sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *ALTERABLES, 573 *CREATABLES, 574 *SUBQUERY_PREDICATES, 575 *TYPE_TOKENS, 576 *NO_PAREN_FUNCTIONS, 577 } 578 ID_VAR_TOKENS.remove(TokenType.UNION) 579 580 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 581 TokenType.ANTI, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.UTC_DATE, 647 TokenType.UTC_TIME, 648 TokenType.UTC_TIMESTAMP, 649 TokenType.WINDOW, 650 TokenType.XOR, 651 *TYPE_TOKENS, 652 *SUBQUERY_PREDICATES, 653 } 654 655 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.AND: exp.And, 657 } 658 659 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.COLON_EQ: exp.PropertyEQ, 661 } 662 663 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 664 TokenType.OR: exp.Or, 665 } 666 667 EQUALITY = { 668 TokenType.EQ: exp.EQ, 669 TokenType.NEQ: exp.NEQ, 670 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 671 } 672 673 COMPARISON = { 674 TokenType.GT: exp.GT, 675 TokenType.GTE: exp.GTE, 676 TokenType.LT: exp.LT, 677 TokenType.LTE: exp.LTE, 678 } 679 680 BITWISE = { 681 TokenType.AMP: exp.BitwiseAnd, 682 TokenType.CARET: exp.BitwiseXor, 683 TokenType.PIPE: exp.BitwiseOr, 684 } 685 686 TERM = { 687 TokenType.DASH: exp.Sub, 688 TokenType.PLUS: exp.Add, 689 TokenType.MOD: exp.Mod, 690 TokenType.COLLATE: exp.Collate, 691 } 692 693 FACTOR = { 694 TokenType.DIV: exp.IntDiv, 695 TokenType.LR_ARROW: exp.Distance, 696 TokenType.SLASH: exp.Div, 697 TokenType.STAR: exp.Mul, 698 } 699 700 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 701 702 TIMES = { 703 TokenType.TIME, 704 TokenType.TIMETZ, 705 } 706 707 TIMESTAMPS = { 708 TokenType.TIMESTAMP, 709 TokenType.TIMESTAMPNTZ, 710 TokenType.TIMESTAMPTZ, 711 TokenType.TIMESTAMPLTZ, 712 *TIMES, 713 } 714 715 SET_OPERATIONS = { 716 TokenType.UNION, 717 TokenType.INTERSECT, 718 TokenType.EXCEPT, 719 } 720 721 JOIN_METHODS = { 722 TokenType.ASOF, 723 TokenType.NATURAL, 724 TokenType.POSITIONAL, 725 } 726 727 JOIN_SIDES = { 728 TokenType.LEFT, 729 TokenType.RIGHT, 730 TokenType.FULL, 731 } 732 733 JOIN_KINDS = { 734 TokenType.ANTI, 735 TokenType.CROSS, 736 TokenType.INNER, 737 TokenType.OUTER, 738 TokenType.SEMI, 739 TokenType.STRAIGHT_JOIN, 740 } 741 742 JOIN_HINTS: t.Set[str] = set() 743 744 LAMBDAS = { 745 TokenType.ARROW: lambda self, expressions: self.expression( 746 exp.Lambda, 747 this=self._replace_lambda( 748 self._parse_assignment(), 749 expressions, 750 ), 751 expressions=expressions, 752 ), 753 TokenType.FARROW: lambda self, expressions: self.expression( 754 exp.Kwarg, 755 this=exp.var(expressions[0].name), 756 expression=self._parse_assignment(), 757 ), 758 } 759 760 COLUMN_OPERATORS = { 761 TokenType.DOT: None, 762 TokenType.DOTCOLON: lambda self, this, to: self.expression( 763 exp.JSONCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.DCOLON: lambda self, this, to: self.build_cast( 768 strict=self.STRICT_CAST, this=this, to=to 769 ), 770 TokenType.ARROW: lambda self, this, path: self.expression( 771 exp.JSONExtract, 772 this=this, 773 expression=self.dialect.to_json_path(path), 774 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 775 ), 776 TokenType.DARROW: lambda self, this, path: self.expression( 777 exp.JSONExtractScalar, 778 this=this, 779 expression=self.dialect.to_json_path(path), 780 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 781 ), 782 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtract, 784 this=this, 785 expression=path, 786 ), 787 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 788 exp.JSONBExtractScalar, 789 this=this, 790 expression=path, 791 ), 792 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 793 exp.JSONBContains, 794 this=this, 795 expression=key, 796 ), 797 } 798 799 CAST_COLUMN_OPERATORS = { 800 TokenType.DOTCOLON, 801 TokenType.DCOLON, 802 } 803 804 EXPRESSION_PARSERS = { 805 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 806 exp.Column: lambda self: self._parse_column(), 807 exp.Condition: lambda self: self._parse_assignment(), 808 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 809 exp.Expression: lambda self: self._parse_expression(), 810 exp.From: lambda self: self._parse_from(joins=True), 811 exp.Group: lambda self: self._parse_group(), 812 exp.Having: lambda self: self._parse_having(), 813 exp.Hint: lambda self: self._parse_hint_body(), 814 exp.Identifier: lambda self: self._parse_id_var(), 815 exp.Join: lambda self: self._parse_join(), 816 exp.Lambda: lambda self: self._parse_lambda(), 817 exp.Lateral: lambda self: self._parse_lateral(), 818 exp.Limit: lambda self: self._parse_limit(), 819 exp.Offset: lambda self: self._parse_offset(), 820 exp.Order: lambda self: self._parse_order(), 821 exp.Ordered: lambda self: self._parse_ordered(), 822 exp.Properties: lambda self: self._parse_properties(), 823 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 824 exp.Qualify: lambda self: self._parse_qualify(), 825 exp.Returning: lambda self: self._parse_returning(), 826 exp.Select: lambda self: self._parse_select(), 827 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 828 exp.Table: lambda self: self._parse_table_parts(), 829 exp.TableAlias: lambda self: self._parse_table_alias(), 830 exp.Tuple: lambda self: self._parse_value(values=False), 831 exp.Whens: lambda self: self._parse_when_matched(), 832 exp.Where: lambda self: self._parse_where(), 833 exp.Window: lambda self: self._parse_named_window(), 834 exp.With: lambda self: self._parse_with(), 835 "JOIN_TYPE": lambda self: self._parse_join_parts(), 836 } 837 838 STATEMENT_PARSERS = { 839 TokenType.ALTER: lambda self: self._parse_alter(), 840 TokenType.ANALYZE: lambda self: self._parse_analyze(), 841 TokenType.BEGIN: lambda self: self._parse_transaction(), 842 TokenType.CACHE: lambda self: self._parse_cache(), 843 TokenType.COMMENT: lambda self: self._parse_comment(), 844 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 845 TokenType.COPY: lambda self: self._parse_copy(), 846 TokenType.CREATE: lambda self: self._parse_create(), 847 TokenType.DELETE: lambda self: self._parse_delete(), 848 TokenType.DESC: lambda self: self._parse_describe(), 849 TokenType.DESCRIBE: lambda self: self._parse_describe(), 850 TokenType.DROP: lambda self: self._parse_drop(), 851 TokenType.GRANT: lambda self: self._parse_grant(), 852 TokenType.REVOKE: lambda self: self._parse_revoke(), 853 TokenType.INSERT: lambda self: self._parse_insert(), 854 TokenType.KILL: lambda self: self._parse_kill(), 855 TokenType.LOAD: lambda self: self._parse_load(), 856 TokenType.MERGE: lambda self: self._parse_merge(), 857 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 858 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 859 TokenType.REFRESH: lambda self: self._parse_refresh(), 860 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 861 TokenType.SET: lambda self: self._parse_set(), 862 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 863 TokenType.UNCACHE: lambda self: self._parse_uncache(), 864 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 865 TokenType.UPDATE: lambda self: self._parse_update(), 866 TokenType.USE: lambda self: self._parse_use(), 867 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 868 } 869 870 UNARY_PARSERS = { 871 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 872 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 873 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 874 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 875 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 876 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 877 } 878 879 STRING_PARSERS = { 880 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 881 exp.RawString, this=token.text 882 ), 883 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 884 exp.National, this=token.text 885 ), 886 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 887 TokenType.STRING: lambda self, token: self.expression( 888 exp.Literal, this=token.text, is_string=True 889 ), 890 TokenType.UNICODE_STRING: lambda self, token: self.expression( 891 exp.UnicodeString, 892 this=token.text, 893 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 894 ), 895 } 896 897 NUMERIC_PARSERS = { 898 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 899 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 900 TokenType.HEX_STRING: lambda self, token: self.expression( 901 exp.HexString, 902 this=token.text, 903 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 904 ), 905 TokenType.NUMBER: lambda self, token: self.expression( 906 exp.Literal, this=token.text, is_string=False 907 ), 908 } 909 910 PRIMARY_PARSERS = { 911 **STRING_PARSERS, 912 **NUMERIC_PARSERS, 913 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 914 TokenType.NULL: lambda self, _: self.expression(exp.Null), 915 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 916 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 917 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 918 TokenType.STAR: lambda self, _: self._parse_star_ops(), 919 } 920 921 PLACEHOLDER_PARSERS = { 922 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 923 TokenType.PARAMETER: lambda self: self._parse_parameter(), 924 TokenType.COLON: lambda self: ( 925 self.expression(exp.Placeholder, this=self._prev.text) 926 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 927 else None 928 ), 929 } 930 931 RANGE_PARSERS = { 932 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 933 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 934 TokenType.GLOB: binary_range_parser(exp.Glob), 935 TokenType.ILIKE: binary_range_parser(exp.ILike), 936 TokenType.IN: lambda self, this: self._parse_in(this), 937 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 938 TokenType.IS: lambda self, this: self._parse_is(this), 939 TokenType.LIKE: binary_range_parser(exp.Like), 940 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 941 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 942 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 943 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 944 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 945 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 946 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 947 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 948 } 949 950 PIPE_SYNTAX_TRANSFORM_PARSERS = { 951 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 952 "AS": lambda self, query: self._build_pipe_cte( 953 query, [exp.Star()], self._parse_table_alias() 954 ), 955 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 956 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 957 "ORDER BY": lambda self, query: query.order_by( 958 self._parse_order(), append=False, copy=False 959 ), 960 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 961 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 962 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 963 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 964 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 965 } 966 967 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 968 "ALLOWED_VALUES": lambda self: self.expression( 969 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 970 ), 971 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 972 "AUTO": lambda self: self._parse_auto_property(), 973 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 974 "BACKUP": lambda self: self.expression( 975 exp.BackupProperty, this=self._parse_var(any_token=True) 976 ), 977 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 978 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 979 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 980 "CHECKSUM": lambda self: self._parse_checksum(), 981 "CLUSTER BY": lambda self: self._parse_cluster(), 982 "CLUSTERED": lambda self: self._parse_clustered_by(), 983 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 984 exp.CollateProperty, **kwargs 985 ), 986 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 987 "CONTAINS": lambda self: self._parse_contains_property(), 988 "COPY": lambda self: self._parse_copy_property(), 989 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 990 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 991 "DEFINER": lambda self: self._parse_definer(), 992 "DETERMINISTIC": lambda self: self.expression( 993 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 994 ), 995 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 996 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 997 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 998 "DISTKEY": lambda self: self._parse_distkey(), 999 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1000 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1001 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1002 "ENVIRONMENT": lambda self: self.expression( 1003 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1004 ), 1005 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1006 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1007 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1008 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1009 "FREESPACE": lambda self: self._parse_freespace(), 1010 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1011 "HEAP": lambda self: self.expression(exp.HeapProperty), 1012 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1013 "IMMUTABLE": lambda self: self.expression( 1014 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1015 ), 1016 "INHERITS": lambda self: self.expression( 1017 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1018 ), 1019 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1020 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1021 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1022 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1023 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1024 "LIKE": lambda self: self._parse_create_like(), 1025 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1026 "LOCK": lambda self: self._parse_locking(), 1027 "LOCKING": lambda self: self._parse_locking(), 1028 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1029 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1030 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1031 "MODIFIES": lambda self: self._parse_modifies_property(), 1032 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1033 "NO": lambda self: self._parse_no_property(), 1034 "ON": lambda self: self._parse_on_property(), 1035 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1036 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1037 "PARTITION": lambda self: self._parse_partitioned_of(), 1038 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1039 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1040 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1041 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1042 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1043 "READS": lambda self: self._parse_reads_property(), 1044 "REMOTE": lambda self: self._parse_remote_with_connection(), 1045 "RETURNS": lambda self: self._parse_returns(), 1046 "STRICT": lambda self: self.expression(exp.StrictProperty), 1047 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1048 "ROW": lambda self: self._parse_row(), 1049 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1050 "SAMPLE": lambda self: self.expression( 1051 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1052 ), 1053 "SECURE": lambda self: self.expression(exp.SecureProperty), 1054 "SECURITY": lambda self: self._parse_security(), 1055 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1056 "SETTINGS": lambda self: self._parse_settings_property(), 1057 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1058 "SORTKEY": lambda self: self._parse_sortkey(), 1059 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1060 "STABLE": lambda self: self.expression( 1061 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1062 ), 1063 "STORED": lambda self: self._parse_stored(), 1064 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1065 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1066 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1067 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1068 "TO": lambda self: self._parse_to_table(), 1069 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1070 "TRANSFORM": lambda self: self.expression( 1071 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1072 ), 1073 "TTL": lambda self: self._parse_ttl(), 1074 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1075 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1076 "VOLATILE": lambda self: self._parse_volatile_property(), 1077 "WITH": lambda self: self._parse_with_property(), 1078 } 1079 1080 CONSTRAINT_PARSERS = { 1081 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1082 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1083 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1084 "CHARACTER SET": lambda self: self.expression( 1085 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1086 ), 1087 "CHECK": lambda self: self.expression( 1088 exp.CheckColumnConstraint, 1089 this=self._parse_wrapped(self._parse_assignment), 1090 enforced=self._match_text_seq("ENFORCED"), 1091 ), 1092 "COLLATE": lambda self: self.expression( 1093 exp.CollateColumnConstraint, 1094 this=self._parse_identifier() or self._parse_column(), 1095 ), 1096 "COMMENT": lambda self: self.expression( 1097 exp.CommentColumnConstraint, this=self._parse_string() 1098 ), 1099 "COMPRESS": lambda self: self._parse_compress(), 1100 "CLUSTERED": lambda self: self.expression( 1101 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1102 ), 1103 "NONCLUSTERED": lambda self: self.expression( 1104 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1105 ), 1106 "DEFAULT": lambda self: self.expression( 1107 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1108 ), 1109 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1110 "EPHEMERAL": lambda self: self.expression( 1111 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1112 ), 1113 "EXCLUDE": lambda self: self.expression( 1114 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1115 ), 1116 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1117 "FORMAT": lambda self: self.expression( 1118 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1119 ), 1120 "GENERATED": lambda self: self._parse_generated_as_identity(), 1121 "IDENTITY": lambda self: self._parse_auto_increment(), 1122 "INLINE": lambda self: self._parse_inline(), 1123 "LIKE": lambda self: self._parse_create_like(), 1124 "NOT": lambda self: self._parse_not_constraint(), 1125 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1126 "ON": lambda self: ( 1127 self._match(TokenType.UPDATE) 1128 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1129 ) 1130 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1131 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1132 "PERIOD": lambda self: self._parse_period_for_system_time(), 1133 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1134 "REFERENCES": lambda self: self._parse_references(match=False), 1135 "TITLE": lambda self: self.expression( 1136 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1137 ), 1138 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1139 "UNIQUE": lambda self: self._parse_unique(), 1140 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1141 "WATERMARK": lambda self: self.expression( 1142 exp.WatermarkColumnConstraint, 1143 this=self._match(TokenType.FOR) and self._parse_column(), 1144 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1145 ), 1146 "WITH": lambda self: self.expression( 1147 exp.Properties, expressions=self._parse_wrapped_properties() 1148 ), 1149 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1150 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1151 } 1152 1153 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1154 if not self._match(TokenType.L_PAREN, advance=False): 1155 # Partitioning by bucket or truncate follows the syntax: 1156 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1157 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1158 self._retreat(self._index - 1) 1159 return None 1160 1161 klass = ( 1162 exp.PartitionedByBucket 1163 if self._prev.text.upper() == "BUCKET" 1164 else exp.PartitionByTruncate 1165 ) 1166 1167 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1168 this, expression = seq_get(args, 0), seq_get(args, 1) 1169 1170 if isinstance(this, exp.Literal): 1171 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1172 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1173 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1174 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1175 # 1176 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1177 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1178 this, expression = expression, this 1179 1180 return self.expression(klass, this=this, expression=expression) 1181 1182 ALTER_PARSERS = { 1183 "ADD": lambda self: self._parse_alter_table_add(), 1184 "AS": lambda self: self._parse_select(), 1185 "ALTER": lambda self: self._parse_alter_table_alter(), 1186 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1187 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1188 "DROP": lambda self: self._parse_alter_table_drop(), 1189 "RENAME": lambda self: self._parse_alter_table_rename(), 1190 "SET": lambda self: self._parse_alter_table_set(), 1191 "SWAP": lambda self: self.expression( 1192 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1193 ), 1194 } 1195 1196 ALTER_ALTER_PARSERS = { 1197 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1198 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1199 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1200 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1201 } 1202 1203 SCHEMA_UNNAMED_CONSTRAINTS = { 1204 "CHECK", 1205 "EXCLUDE", 1206 "FOREIGN KEY", 1207 "LIKE", 1208 "PERIOD", 1209 "PRIMARY KEY", 1210 "UNIQUE", 1211 "WATERMARK", 1212 "BUCKET", 1213 "TRUNCATE", 1214 } 1215 1216 NO_PAREN_FUNCTION_PARSERS = { 1217 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1218 "CASE": lambda self: self._parse_case(), 1219 "CONNECT_BY_ROOT": lambda self: self.expression( 1220 exp.ConnectByRoot, this=self._parse_column() 1221 ), 1222 "IF": lambda self: self._parse_if(), 1223 } 1224 1225 INVALID_FUNC_NAME_TOKENS = { 1226 TokenType.IDENTIFIER, 1227 TokenType.STRING, 1228 } 1229 1230 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1231 1232 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1233 1234 FUNCTION_PARSERS = { 1235 **{ 1236 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1237 }, 1238 **{ 1239 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1240 }, 1241 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1242 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1243 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1244 "DECODE": lambda self: self._parse_decode(), 1245 "EXTRACT": lambda self: self._parse_extract(), 1246 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1247 "GAP_FILL": lambda self: self._parse_gap_fill(), 1248 "JSON_OBJECT": lambda self: self._parse_json_object(), 1249 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1250 "JSON_TABLE": lambda self: self._parse_json_table(), 1251 "MATCH": lambda self: self._parse_match_against(), 1252 "NORMALIZE": lambda self: self._parse_normalize(), 1253 "OPENJSON": lambda self: self._parse_open_json(), 1254 "OVERLAY": lambda self: self._parse_overlay(), 1255 "POSITION": lambda self: self._parse_position(), 1256 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1257 "STRING_AGG": lambda self: self._parse_string_agg(), 1258 "SUBSTRING": lambda self: self._parse_substring(), 1259 "TRIM": lambda self: self._parse_trim(), 1260 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1261 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1262 "XMLELEMENT": lambda self: self.expression( 1263 exp.XMLElement, 1264 this=self._match_text_seq("NAME") and self._parse_id_var(), 1265 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1266 ), 1267 "XMLTABLE": lambda self: self._parse_xml_table(), 1268 } 1269 1270 QUERY_MODIFIER_PARSERS = { 1271 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1272 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1273 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1274 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1275 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1276 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1277 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1278 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1279 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1280 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1281 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1282 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1283 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1284 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1285 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1286 TokenType.CLUSTER_BY: lambda self: ( 1287 "cluster", 1288 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1289 ), 1290 TokenType.DISTRIBUTE_BY: lambda self: ( 1291 "distribute", 1292 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1293 ), 1294 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1295 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1296 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1297 } 1298 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1299 1300 SET_PARSERS = { 1301 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1302 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1303 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1304 "TRANSACTION": lambda self: self._parse_set_transaction(), 1305 } 1306 1307 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1308 1309 TYPE_LITERAL_PARSERS = { 1310 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1311 } 1312 1313 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1314 1315 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1316 1317 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1318 1319 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1320 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1321 "ISOLATION": ( 1322 ("LEVEL", "REPEATABLE", "READ"), 1323 ("LEVEL", "READ", "COMMITTED"), 1324 ("LEVEL", "READ", "UNCOMITTED"), 1325 ("LEVEL", "SERIALIZABLE"), 1326 ), 1327 "READ": ("WRITE", "ONLY"), 1328 } 1329 1330 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1331 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1332 ) 1333 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1334 1335 CREATE_SEQUENCE: OPTIONS_TYPE = { 1336 "SCALE": ("EXTEND", "NOEXTEND"), 1337 "SHARD": ("EXTEND", "NOEXTEND"), 1338 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1339 **dict.fromkeys( 1340 ( 1341 "SESSION", 1342 "GLOBAL", 1343 "KEEP", 1344 "NOKEEP", 1345 "ORDER", 1346 "NOORDER", 1347 "NOCACHE", 1348 "CYCLE", 1349 "NOCYCLE", 1350 "NOMINVALUE", 1351 "NOMAXVALUE", 1352 "NOSCALE", 1353 "NOSHARD", 1354 ), 1355 tuple(), 1356 ), 1357 } 1358 1359 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1360 1361 USABLES: OPTIONS_TYPE = dict.fromkeys( 1362 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1363 ) 1364 1365 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1366 1367 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1368 "TYPE": ("EVOLUTION",), 1369 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1370 } 1371 1372 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1373 1374 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1375 1376 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1377 "NOT": ("ENFORCED",), 1378 "MATCH": ( 1379 "FULL", 1380 "PARTIAL", 1381 "SIMPLE", 1382 ), 1383 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1384 "USING": ( 1385 "BTREE", 1386 "HASH", 1387 ), 1388 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1389 } 1390 1391 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1392 "NO": ("OTHERS",), 1393 "CURRENT": ("ROW",), 1394 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1395 } 1396 1397 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1398 1399 CLONE_KEYWORDS = {"CLONE", "COPY"} 1400 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1401 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1402 1403 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1404 1405 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1406 1407 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1408 1409 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1410 1411 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1412 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1413 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1414 1415 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1416 1417 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1418 1419 ADD_CONSTRAINT_TOKENS = { 1420 TokenType.CONSTRAINT, 1421 TokenType.FOREIGN_KEY, 1422 TokenType.INDEX, 1423 TokenType.KEY, 1424 TokenType.PRIMARY_KEY, 1425 TokenType.UNIQUE, 1426 } 1427 1428 DISTINCT_TOKENS = {TokenType.DISTINCT} 1429 1430 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1431 1432 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1433 1434 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1435 1436 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1437 1438 ODBC_DATETIME_LITERALS = { 1439 "d": exp.Date, 1440 "t": exp.Time, 1441 "ts": exp.Timestamp, 1442 } 1443 1444 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1445 1446 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1447 1448 # The style options for the DESCRIBE statement 1449 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1450 1451 # The style options for the ANALYZE statement 1452 ANALYZE_STYLES = { 1453 "BUFFER_USAGE_LIMIT", 1454 "FULL", 1455 "LOCAL", 1456 "NO_WRITE_TO_BINLOG", 1457 "SAMPLE", 1458 "SKIP_LOCKED", 1459 "VERBOSE", 1460 } 1461 1462 ANALYZE_EXPRESSION_PARSERS = { 1463 "ALL": lambda self: self._parse_analyze_columns(), 1464 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1465 "DELETE": lambda self: self._parse_analyze_delete(), 1466 "DROP": lambda self: self._parse_analyze_histogram(), 1467 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1468 "LIST": lambda self: self._parse_analyze_list(), 1469 "PREDICATE": lambda self: self._parse_analyze_columns(), 1470 "UPDATE": lambda self: self._parse_analyze_histogram(), 1471 "VALIDATE": lambda self: self._parse_analyze_validate(), 1472 } 1473 1474 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1475 1476 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1477 1478 OPERATION_MODIFIERS: t.Set[str] = set() 1479 1480 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1481 1482 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1483 1484 STRICT_CAST = True 1485 1486 PREFIXED_PIVOT_COLUMNS = False 1487 IDENTIFY_PIVOT_STRINGS = False 1488 1489 LOG_DEFAULTS_TO_LN = False 1490 1491 # Whether the table sample clause expects CSV syntax 1492 TABLESAMPLE_CSV = False 1493 1494 # The default method used for table sampling 1495 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1496 1497 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1498 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1499 1500 # Whether the TRIM function expects the characters to trim as its first argument 1501 TRIM_PATTERN_FIRST = False 1502 1503 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1504 STRING_ALIASES = False 1505 1506 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1507 MODIFIERS_ATTACHED_TO_SET_OP = True 1508 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1509 1510 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1511 NO_PAREN_IF_COMMANDS = True 1512 1513 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1514 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1515 1516 # Whether the `:` operator is used to extract a value from a VARIANT column 1517 COLON_IS_VARIANT_EXTRACT = False 1518 1519 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1520 # If this is True and '(' is not found, the keyword will be treated as an identifier 1521 VALUES_FOLLOWED_BY_PAREN = True 1522 1523 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1524 SUPPORTS_IMPLICIT_UNNEST = False 1525 1526 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1527 INTERVAL_SPANS = True 1528 1529 # Whether a PARTITION clause can follow a table reference 1530 SUPPORTS_PARTITION_SELECTION = False 1531 1532 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1533 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1534 1535 # Whether the 'AS' keyword is optional in the CTE definition syntax 1536 OPTIONAL_ALIAS_TOKEN_CTE = True 1537 1538 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1539 ALTER_RENAME_REQUIRES_COLUMN = True 1540 1541 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1542 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1543 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1544 # as BigQuery, where all joins have the same precedence. 1545 JOINS_HAVE_EQUAL_PRECEDENCE = False 1546 1547 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1548 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1549 1550 # Whether map literals support arbitrary expressions as keys. 1551 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1552 # When False, keys are typically restricted to identifiers. 1553 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1554 1555 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1556 # is true for Snowflake but not for BigQuery which can also process strings 1557 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1558 1559 # Dialects like Databricks support JOINS without join criteria 1560 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1561 ADD_JOIN_ON_TRUE = False 1562 1563 __slots__ = ( 1564 "error_level", 1565 "error_message_context", 1566 "max_errors", 1567 "dialect", 1568 "sql", 1569 "errors", 1570 "_tokens", 1571 "_index", 1572 "_curr", 1573 "_next", 1574 "_prev", 1575 "_prev_comments", 1576 "_pipe_cte_counter", 1577 ) 1578 1579 # Autofilled 1580 SHOW_TRIE: t.Dict = {} 1581 SET_TRIE: t.Dict = {} 1582 1583 def __init__( 1584 self, 1585 error_level: t.Optional[ErrorLevel] = None, 1586 error_message_context: int = 100, 1587 max_errors: int = 3, 1588 dialect: DialectType = None, 1589 ): 1590 from sqlglot.dialects import Dialect 1591 1592 self.error_level = error_level or ErrorLevel.IMMEDIATE 1593 self.error_message_context = error_message_context 1594 self.max_errors = max_errors 1595 self.dialect = Dialect.get_or_raise(dialect) 1596 self.reset() 1597 1598 def reset(self): 1599 self.sql = "" 1600 self.errors = [] 1601 self._tokens = [] 1602 self._index = 0 1603 self._curr = None 1604 self._next = None 1605 self._prev = None 1606 self._prev_comments = None 1607 self._pipe_cte_counter = 0 1608 1609 def parse( 1610 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1611 ) -> t.List[t.Optional[exp.Expression]]: 1612 """ 1613 Parses a list of tokens and returns a list of syntax trees, one tree 1614 per parsed SQL statement. 1615 1616 Args: 1617 raw_tokens: The list of tokens. 1618 sql: The original SQL string, used to produce helpful debug messages. 1619 1620 Returns: 1621 The list of the produced syntax trees. 1622 """ 1623 return self._parse( 1624 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1625 ) 1626 1627 def parse_into( 1628 self, 1629 expression_types: exp.IntoType, 1630 raw_tokens: t.List[Token], 1631 sql: t.Optional[str] = None, 1632 ) -> t.List[t.Optional[exp.Expression]]: 1633 """ 1634 Parses a list of tokens into a given Expression type. If a collection of Expression 1635 types is given instead, this method will try to parse the token list into each one 1636 of them, stopping at the first for which the parsing succeeds. 1637 1638 Args: 1639 expression_types: The expression type(s) to try and parse the token list into. 1640 raw_tokens: The list of tokens. 1641 sql: The original SQL string, used to produce helpful debug messages. 1642 1643 Returns: 1644 The target Expression. 1645 """ 1646 errors = [] 1647 for expression_type in ensure_list(expression_types): 1648 parser = self.EXPRESSION_PARSERS.get(expression_type) 1649 if not parser: 1650 raise TypeError(f"No parser registered for {expression_type}") 1651 1652 try: 1653 return self._parse(parser, raw_tokens, sql) 1654 except ParseError as e: 1655 e.errors[0]["into_expression"] = expression_type 1656 errors.append(e) 1657 1658 raise ParseError( 1659 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1660 errors=merge_errors(errors), 1661 ) from errors[-1] 1662 1663 def _parse( 1664 self, 1665 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1666 raw_tokens: t.List[Token], 1667 sql: t.Optional[str] = None, 1668 ) -> t.List[t.Optional[exp.Expression]]: 1669 self.reset() 1670 self.sql = sql or "" 1671 1672 total = len(raw_tokens) 1673 chunks: t.List[t.List[Token]] = [[]] 1674 1675 for i, token in enumerate(raw_tokens): 1676 if token.token_type == TokenType.SEMICOLON: 1677 if token.comments: 1678 chunks.append([token]) 1679 1680 if i < total - 1: 1681 chunks.append([]) 1682 else: 1683 chunks[-1].append(token) 1684 1685 expressions = [] 1686 1687 for tokens in chunks: 1688 self._index = -1 1689 self._tokens = tokens 1690 self._advance() 1691 1692 expressions.append(parse_method(self)) 1693 1694 if self._index < len(self._tokens): 1695 self.raise_error("Invalid expression / Unexpected token") 1696 1697 self.check_errors() 1698 1699 return expressions 1700 1701 def check_errors(self) -> None: 1702 """Logs or raises any found errors, depending on the chosen error level setting.""" 1703 if self.error_level == ErrorLevel.WARN: 1704 for error in self.errors: 1705 logger.error(str(error)) 1706 elif self.error_level == ErrorLevel.RAISE and self.errors: 1707 raise ParseError( 1708 concat_messages(self.errors, self.max_errors), 1709 errors=merge_errors(self.errors), 1710 ) 1711 1712 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1713 """ 1714 Appends an error in the list of recorded errors or raises it, depending on the chosen 1715 error level setting. 1716 """ 1717 token = token or self._curr or self._prev or Token.string("") 1718 start = token.start 1719 end = token.end + 1 1720 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1721 highlight = self.sql[start:end] 1722 end_context = self.sql[end : end + self.error_message_context] 1723 1724 error = ParseError.new( 1725 f"{message}. Line {token.line}, Col: {token.col}.\n" 1726 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1727 description=message, 1728 line=token.line, 1729 col=token.col, 1730 start_context=start_context, 1731 highlight=highlight, 1732 end_context=end_context, 1733 ) 1734 1735 if self.error_level == ErrorLevel.IMMEDIATE: 1736 raise error 1737 1738 self.errors.append(error) 1739 1740 def expression( 1741 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1742 ) -> E: 1743 """ 1744 Creates a new, validated Expression. 1745 1746 Args: 1747 exp_class: The expression class to instantiate. 1748 comments: An optional list of comments to attach to the expression. 1749 kwargs: The arguments to set for the expression along with their respective values. 1750 1751 Returns: 1752 The target expression. 1753 """ 1754 instance = exp_class(**kwargs) 1755 instance.add_comments(comments) if comments else self._add_comments(instance) 1756 return self.validate_expression(instance) 1757 1758 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1759 if expression and self._prev_comments: 1760 expression.add_comments(self._prev_comments) 1761 self._prev_comments = None 1762 1763 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1764 """ 1765 Validates an Expression, making sure that all its mandatory arguments are set. 1766 1767 Args: 1768 expression: The expression to validate. 1769 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1770 1771 Returns: 1772 The validated expression. 1773 """ 1774 if self.error_level != ErrorLevel.IGNORE: 1775 for error_message in expression.error_messages(args): 1776 self.raise_error(error_message) 1777 1778 return expression 1779 1780 def _find_sql(self, start: Token, end: Token) -> str: 1781 return self.sql[start.start : end.end + 1] 1782 1783 def _is_connected(self) -> bool: 1784 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1785 1786 def _advance(self, times: int = 1) -> None: 1787 self._index += times 1788 self._curr = seq_get(self._tokens, self._index) 1789 self._next = seq_get(self._tokens, self._index + 1) 1790 1791 if self._index > 0: 1792 self._prev = self._tokens[self._index - 1] 1793 self._prev_comments = self._prev.comments 1794 else: 1795 self._prev = None 1796 self._prev_comments = None 1797 1798 def _retreat(self, index: int) -> None: 1799 if index != self._index: 1800 self._advance(index - self._index) 1801 1802 def _warn_unsupported(self) -> None: 1803 if len(self._tokens) <= 1: 1804 return 1805 1806 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1807 # interested in emitting a warning for the one being currently processed. 1808 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1809 1810 logger.warning( 1811 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1812 ) 1813 1814 def _parse_command(self) -> exp.Command: 1815 self._warn_unsupported() 1816 return self.expression( 1817 exp.Command, 1818 comments=self._prev_comments, 1819 this=self._prev.text.upper(), 1820 expression=self._parse_string(), 1821 ) 1822 1823 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1824 """ 1825 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1826 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1827 solve this by setting & resetting the parser state accordingly 1828 """ 1829 index = self._index 1830 error_level = self.error_level 1831 1832 self.error_level = ErrorLevel.IMMEDIATE 1833 try: 1834 this = parse_method() 1835 except ParseError: 1836 this = None 1837 finally: 1838 if not this or retreat: 1839 self._retreat(index) 1840 self.error_level = error_level 1841 1842 return this 1843 1844 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1845 start = self._prev 1846 exists = self._parse_exists() if allow_exists else None 1847 1848 self._match(TokenType.ON) 1849 1850 materialized = self._match_text_seq("MATERIALIZED") 1851 kind = self._match_set(self.CREATABLES) and self._prev 1852 if not kind: 1853 return self._parse_as_command(start) 1854 1855 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1856 this = self._parse_user_defined_function(kind=kind.token_type) 1857 elif kind.token_type == TokenType.TABLE: 1858 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1859 elif kind.token_type == TokenType.COLUMN: 1860 this = self._parse_column() 1861 else: 1862 this = self._parse_id_var() 1863 1864 self._match(TokenType.IS) 1865 1866 return self.expression( 1867 exp.Comment, 1868 this=this, 1869 kind=kind.text, 1870 expression=self._parse_string(), 1871 exists=exists, 1872 materialized=materialized, 1873 ) 1874 1875 def _parse_to_table( 1876 self, 1877 ) -> exp.ToTableProperty: 1878 table = self._parse_table_parts(schema=True) 1879 return self.expression(exp.ToTableProperty, this=table) 1880 1881 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1882 def _parse_ttl(self) -> exp.Expression: 1883 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1884 this = self._parse_bitwise() 1885 1886 if self._match_text_seq("DELETE"): 1887 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1888 if self._match_text_seq("RECOMPRESS"): 1889 return self.expression( 1890 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1891 ) 1892 if self._match_text_seq("TO", "DISK"): 1893 return self.expression( 1894 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1895 ) 1896 if self._match_text_seq("TO", "VOLUME"): 1897 return self.expression( 1898 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1899 ) 1900 1901 return this 1902 1903 expressions = self._parse_csv(_parse_ttl_action) 1904 where = self._parse_where() 1905 group = self._parse_group() 1906 1907 aggregates = None 1908 if group and self._match(TokenType.SET): 1909 aggregates = self._parse_csv(self._parse_set_item) 1910 1911 return self.expression( 1912 exp.MergeTreeTTL, 1913 expressions=expressions, 1914 where=where, 1915 group=group, 1916 aggregates=aggregates, 1917 ) 1918 1919 def _parse_statement(self) -> t.Optional[exp.Expression]: 1920 if self._curr is None: 1921 return None 1922 1923 if self._match_set(self.STATEMENT_PARSERS): 1924 comments = self._prev_comments 1925 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1926 stmt.add_comments(comments, prepend=True) 1927 return stmt 1928 1929 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1930 return self._parse_command() 1931 1932 expression = self._parse_expression() 1933 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1934 return self._parse_query_modifiers(expression) 1935 1936 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1937 start = self._prev 1938 temporary = self._match(TokenType.TEMPORARY) 1939 materialized = self._match_text_seq("MATERIALIZED") 1940 1941 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1942 if not kind: 1943 return self._parse_as_command(start) 1944 1945 concurrently = self._match_text_seq("CONCURRENTLY") 1946 if_exists = exists or self._parse_exists() 1947 1948 if kind == "COLUMN": 1949 this = self._parse_column() 1950 else: 1951 this = self._parse_table_parts( 1952 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1953 ) 1954 1955 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1956 1957 if self._match(TokenType.L_PAREN, advance=False): 1958 expressions = self._parse_wrapped_csv(self._parse_types) 1959 else: 1960 expressions = None 1961 1962 return self.expression( 1963 exp.Drop, 1964 exists=if_exists, 1965 this=this, 1966 expressions=expressions, 1967 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1968 temporary=temporary, 1969 materialized=materialized, 1970 cascade=self._match_text_seq("CASCADE"), 1971 constraints=self._match_text_seq("CONSTRAINTS"), 1972 purge=self._match_text_seq("PURGE"), 1973 cluster=cluster, 1974 concurrently=concurrently, 1975 ) 1976 1977 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1978 return ( 1979 self._match_text_seq("IF") 1980 and (not not_ or self._match(TokenType.NOT)) 1981 and self._match(TokenType.EXISTS) 1982 ) 1983 1984 def _parse_create(self) -> exp.Create | exp.Command: 1985 # Note: this can't be None because we've matched a statement parser 1986 start = self._prev 1987 1988 replace = ( 1989 start.token_type == TokenType.REPLACE 1990 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1991 or self._match_pair(TokenType.OR, TokenType.ALTER) 1992 ) 1993 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1994 1995 unique = self._match(TokenType.UNIQUE) 1996 1997 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1998 clustered = True 1999 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2000 "COLUMNSTORE" 2001 ): 2002 clustered = False 2003 else: 2004 clustered = None 2005 2006 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2007 self._advance() 2008 2009 properties = None 2010 create_token = self._match_set(self.CREATABLES) and self._prev 2011 2012 if not create_token: 2013 # exp.Properties.Location.POST_CREATE 2014 properties = self._parse_properties() 2015 create_token = self._match_set(self.CREATABLES) and self._prev 2016 2017 if not properties or not create_token: 2018 return self._parse_as_command(start) 2019 2020 concurrently = self._match_text_seq("CONCURRENTLY") 2021 exists = self._parse_exists(not_=True) 2022 this = None 2023 expression: t.Optional[exp.Expression] = None 2024 indexes = None 2025 no_schema_binding = None 2026 begin = None 2027 end = None 2028 clone = None 2029 2030 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2031 nonlocal properties 2032 if properties and temp_props: 2033 properties.expressions.extend(temp_props.expressions) 2034 elif temp_props: 2035 properties = temp_props 2036 2037 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2038 this = self._parse_user_defined_function(kind=create_token.token_type) 2039 2040 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2041 extend_props(self._parse_properties()) 2042 2043 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2044 extend_props(self._parse_properties()) 2045 2046 if not expression: 2047 if self._match(TokenType.COMMAND): 2048 expression = self._parse_as_command(self._prev) 2049 else: 2050 begin = self._match(TokenType.BEGIN) 2051 return_ = self._match_text_seq("RETURN") 2052 2053 if self._match(TokenType.STRING, advance=False): 2054 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2055 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2056 expression = self._parse_string() 2057 extend_props(self._parse_properties()) 2058 else: 2059 expression = self._parse_user_defined_function_expression() 2060 2061 end = self._match_text_seq("END") 2062 2063 if return_: 2064 expression = self.expression(exp.Return, this=expression) 2065 elif create_token.token_type == TokenType.INDEX: 2066 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2067 if not self._match(TokenType.ON): 2068 index = self._parse_id_var() 2069 anonymous = False 2070 else: 2071 index = None 2072 anonymous = True 2073 2074 this = self._parse_index(index=index, anonymous=anonymous) 2075 elif create_token.token_type in self.DB_CREATABLES: 2076 table_parts = self._parse_table_parts( 2077 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2078 ) 2079 2080 # exp.Properties.Location.POST_NAME 2081 self._match(TokenType.COMMA) 2082 extend_props(self._parse_properties(before=True)) 2083 2084 this = self._parse_schema(this=table_parts) 2085 2086 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2087 extend_props(self._parse_properties()) 2088 2089 has_alias = self._match(TokenType.ALIAS) 2090 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2091 # exp.Properties.Location.POST_ALIAS 2092 extend_props(self._parse_properties()) 2093 2094 if create_token.token_type == TokenType.SEQUENCE: 2095 expression = self._parse_types() 2096 props = self._parse_properties() 2097 if props: 2098 sequence_props = exp.SequenceProperties() 2099 options = [] 2100 for prop in props: 2101 if isinstance(prop, exp.SequenceProperties): 2102 for arg, value in prop.args.items(): 2103 if arg == "options": 2104 options.extend(value) 2105 else: 2106 sequence_props.set(arg, value) 2107 prop.pop() 2108 2109 if options: 2110 sequence_props.set("options", options) 2111 2112 props.append("expressions", sequence_props) 2113 extend_props(props) 2114 else: 2115 expression = self._parse_ddl_select() 2116 2117 # Some dialects also support using a table as an alias instead of a SELECT. 2118 # Here we fallback to this as an alternative. 2119 if not expression and has_alias: 2120 expression = self._try_parse(self._parse_table_parts) 2121 2122 if create_token.token_type == TokenType.TABLE: 2123 # exp.Properties.Location.POST_EXPRESSION 2124 extend_props(self._parse_properties()) 2125 2126 indexes = [] 2127 while True: 2128 index = self._parse_index() 2129 2130 # exp.Properties.Location.POST_INDEX 2131 extend_props(self._parse_properties()) 2132 if not index: 2133 break 2134 else: 2135 self._match(TokenType.COMMA) 2136 indexes.append(index) 2137 elif create_token.token_type == TokenType.VIEW: 2138 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2139 no_schema_binding = True 2140 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2141 extend_props(self._parse_properties()) 2142 2143 shallow = self._match_text_seq("SHALLOW") 2144 2145 if self._match_texts(self.CLONE_KEYWORDS): 2146 copy = self._prev.text.lower() == "copy" 2147 clone = self.expression( 2148 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2149 ) 2150 2151 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2152 return self._parse_as_command(start) 2153 2154 create_kind_text = create_token.text.upper() 2155 return self.expression( 2156 exp.Create, 2157 this=this, 2158 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2159 replace=replace, 2160 refresh=refresh, 2161 unique=unique, 2162 expression=expression, 2163 exists=exists, 2164 properties=properties, 2165 indexes=indexes, 2166 no_schema_binding=no_schema_binding, 2167 begin=begin, 2168 end=end, 2169 clone=clone, 2170 concurrently=concurrently, 2171 clustered=clustered, 2172 ) 2173 2174 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2175 seq = exp.SequenceProperties() 2176 2177 options = [] 2178 index = self._index 2179 2180 while self._curr: 2181 self._match(TokenType.COMMA) 2182 if self._match_text_seq("INCREMENT"): 2183 self._match_text_seq("BY") 2184 self._match_text_seq("=") 2185 seq.set("increment", self._parse_term()) 2186 elif self._match_text_seq("MINVALUE"): 2187 seq.set("minvalue", self._parse_term()) 2188 elif self._match_text_seq("MAXVALUE"): 2189 seq.set("maxvalue", self._parse_term()) 2190 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2191 self._match_text_seq("=") 2192 seq.set("start", self._parse_term()) 2193 elif self._match_text_seq("CACHE"): 2194 # T-SQL allows empty CACHE which is initialized dynamically 2195 seq.set("cache", self._parse_number() or True) 2196 elif self._match_text_seq("OWNED", "BY"): 2197 # "OWNED BY NONE" is the default 2198 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2199 else: 2200 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2201 if opt: 2202 options.append(opt) 2203 else: 2204 break 2205 2206 seq.set("options", options if options else None) 2207 return None if self._index == index else seq 2208 2209 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2210 # only used for teradata currently 2211 self._match(TokenType.COMMA) 2212 2213 kwargs = { 2214 "no": self._match_text_seq("NO"), 2215 "dual": self._match_text_seq("DUAL"), 2216 "before": self._match_text_seq("BEFORE"), 2217 "default": self._match_text_seq("DEFAULT"), 2218 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2219 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2220 "after": self._match_text_seq("AFTER"), 2221 "minimum": self._match_texts(("MIN", "MINIMUM")), 2222 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2223 } 2224 2225 if self._match_texts(self.PROPERTY_PARSERS): 2226 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2227 try: 2228 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2229 except TypeError: 2230 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2231 2232 return None 2233 2234 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2235 return self._parse_wrapped_csv(self._parse_property) 2236 2237 def _parse_property(self) -> t.Optional[exp.Expression]: 2238 if self._match_texts(self.PROPERTY_PARSERS): 2239 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2240 2241 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2242 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2243 2244 if self._match_text_seq("COMPOUND", "SORTKEY"): 2245 return self._parse_sortkey(compound=True) 2246 2247 if self._match_text_seq("SQL", "SECURITY"): 2248 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2249 2250 index = self._index 2251 2252 seq_props = self._parse_sequence_properties() 2253 if seq_props: 2254 return seq_props 2255 2256 self._retreat(index) 2257 key = self._parse_column() 2258 2259 if not self._match(TokenType.EQ): 2260 self._retreat(index) 2261 return None 2262 2263 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2264 if isinstance(key, exp.Column): 2265 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2266 2267 value = self._parse_bitwise() or self._parse_var(any_token=True) 2268 2269 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2270 if isinstance(value, exp.Column): 2271 value = exp.var(value.name) 2272 2273 return self.expression(exp.Property, this=key, value=value) 2274 2275 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2276 if self._match_text_seq("BY"): 2277 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2278 2279 self._match(TokenType.ALIAS) 2280 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2281 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2282 2283 return self.expression( 2284 exp.FileFormatProperty, 2285 this=( 2286 self.expression( 2287 exp.InputOutputFormat, 2288 input_format=input_format, 2289 output_format=output_format, 2290 ) 2291 if input_format or output_format 2292 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2293 ), 2294 hive_format=True, 2295 ) 2296 2297 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2298 field = self._parse_field() 2299 if isinstance(field, exp.Identifier) and not field.quoted: 2300 field = exp.var(field) 2301 2302 return field 2303 2304 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2305 self._match(TokenType.EQ) 2306 self._match(TokenType.ALIAS) 2307 2308 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2309 2310 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2311 properties = [] 2312 while True: 2313 if before: 2314 prop = self._parse_property_before() 2315 else: 2316 prop = self._parse_property() 2317 if not prop: 2318 break 2319 for p in ensure_list(prop): 2320 properties.append(p) 2321 2322 if properties: 2323 return self.expression(exp.Properties, expressions=properties) 2324 2325 return None 2326 2327 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2328 return self.expression( 2329 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2330 ) 2331 2332 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2333 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2334 security_specifier = self._prev.text.upper() 2335 return self.expression(exp.SecurityProperty, this=security_specifier) 2336 return None 2337 2338 def _parse_settings_property(self) -> exp.SettingsProperty: 2339 return self.expression( 2340 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2341 ) 2342 2343 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2344 if self._index >= 2: 2345 pre_volatile_token = self._tokens[self._index - 2] 2346 else: 2347 pre_volatile_token = None 2348 2349 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2350 return exp.VolatileProperty() 2351 2352 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2353 2354 def _parse_retention_period(self) -> exp.Var: 2355 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2356 number = self._parse_number() 2357 number_str = f"{number} " if number else "" 2358 unit = self._parse_var(any_token=True) 2359 return exp.var(f"{number_str}{unit}") 2360 2361 def _parse_system_versioning_property( 2362 self, with_: bool = False 2363 ) -> exp.WithSystemVersioningProperty: 2364 self._match(TokenType.EQ) 2365 prop = self.expression( 2366 exp.WithSystemVersioningProperty, 2367 **{ # type: ignore 2368 "on": True, 2369 "with": with_, 2370 }, 2371 ) 2372 2373 if self._match_text_seq("OFF"): 2374 prop.set("on", False) 2375 return prop 2376 2377 self._match(TokenType.ON) 2378 if self._match(TokenType.L_PAREN): 2379 while self._curr and not self._match(TokenType.R_PAREN): 2380 if self._match_text_seq("HISTORY_TABLE", "="): 2381 prop.set("this", self._parse_table_parts()) 2382 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2383 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2384 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2385 prop.set("retention_period", self._parse_retention_period()) 2386 2387 self._match(TokenType.COMMA) 2388 2389 return prop 2390 2391 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2392 self._match(TokenType.EQ) 2393 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2394 prop = self.expression(exp.DataDeletionProperty, on=on) 2395 2396 if self._match(TokenType.L_PAREN): 2397 while self._curr and not self._match(TokenType.R_PAREN): 2398 if self._match_text_seq("FILTER_COLUMN", "="): 2399 prop.set("filter_column", self._parse_column()) 2400 elif self._match_text_seq("RETENTION_PERIOD", "="): 2401 prop.set("retention_period", self._parse_retention_period()) 2402 2403 self._match(TokenType.COMMA) 2404 2405 return prop 2406 2407 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2408 kind = "HASH" 2409 expressions: t.Optional[t.List[exp.Expression]] = None 2410 if self._match_text_seq("BY", "HASH"): 2411 expressions = self._parse_wrapped_csv(self._parse_id_var) 2412 elif self._match_text_seq("BY", "RANDOM"): 2413 kind = "RANDOM" 2414 2415 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2416 buckets: t.Optional[exp.Expression] = None 2417 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2418 buckets = self._parse_number() 2419 2420 return self.expression( 2421 exp.DistributedByProperty, 2422 expressions=expressions, 2423 kind=kind, 2424 buckets=buckets, 2425 order=self._parse_order(), 2426 ) 2427 2428 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2429 self._match_text_seq("KEY") 2430 expressions = self._parse_wrapped_id_vars() 2431 return self.expression(expr_type, expressions=expressions) 2432 2433 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2434 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2435 prop = self._parse_system_versioning_property(with_=True) 2436 self._match_r_paren() 2437 return prop 2438 2439 if self._match(TokenType.L_PAREN, advance=False): 2440 return self._parse_wrapped_properties() 2441 2442 if self._match_text_seq("JOURNAL"): 2443 return self._parse_withjournaltable() 2444 2445 if self._match_texts(self.VIEW_ATTRIBUTES): 2446 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2447 2448 if self._match_text_seq("DATA"): 2449 return self._parse_withdata(no=False) 2450 elif self._match_text_seq("NO", "DATA"): 2451 return self._parse_withdata(no=True) 2452 2453 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2454 return self._parse_serde_properties(with_=True) 2455 2456 if self._match(TokenType.SCHEMA): 2457 return self.expression( 2458 exp.WithSchemaBindingProperty, 2459 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2460 ) 2461 2462 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2463 return self.expression( 2464 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2465 ) 2466 2467 if not self._next: 2468 return None 2469 2470 return self._parse_withisolatedloading() 2471 2472 def _parse_procedure_option(self) -> exp.Expression | None: 2473 if self._match_text_seq("EXECUTE", "AS"): 2474 return self.expression( 2475 exp.ExecuteAsProperty, 2476 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2477 or self._parse_string(), 2478 ) 2479 2480 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2481 2482 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2483 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2484 self._match(TokenType.EQ) 2485 2486 user = self._parse_id_var() 2487 self._match(TokenType.PARAMETER) 2488 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2489 2490 if not user or not host: 2491 return None 2492 2493 return exp.DefinerProperty(this=f"{user}@{host}") 2494 2495 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2496 self._match(TokenType.TABLE) 2497 self._match(TokenType.EQ) 2498 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2499 2500 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2501 return self.expression(exp.LogProperty, no=no) 2502 2503 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2504 return self.expression(exp.JournalProperty, **kwargs) 2505 2506 def _parse_checksum(self) -> exp.ChecksumProperty: 2507 self._match(TokenType.EQ) 2508 2509 on = None 2510 if self._match(TokenType.ON): 2511 on = True 2512 elif self._match_text_seq("OFF"): 2513 on = False 2514 2515 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2516 2517 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2518 return self.expression( 2519 exp.Cluster, 2520 expressions=( 2521 self._parse_wrapped_csv(self._parse_ordered) 2522 if wrapped 2523 else self._parse_csv(self._parse_ordered) 2524 ), 2525 ) 2526 2527 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2528 self._match_text_seq("BY") 2529 2530 self._match_l_paren() 2531 expressions = self._parse_csv(self._parse_column) 2532 self._match_r_paren() 2533 2534 if self._match_text_seq("SORTED", "BY"): 2535 self._match_l_paren() 2536 sorted_by = self._parse_csv(self._parse_ordered) 2537 self._match_r_paren() 2538 else: 2539 sorted_by = None 2540 2541 self._match(TokenType.INTO) 2542 buckets = self._parse_number() 2543 self._match_text_seq("BUCKETS") 2544 2545 return self.expression( 2546 exp.ClusteredByProperty, 2547 expressions=expressions, 2548 sorted_by=sorted_by, 2549 buckets=buckets, 2550 ) 2551 2552 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2553 if not self._match_text_seq("GRANTS"): 2554 self._retreat(self._index - 1) 2555 return None 2556 2557 return self.expression(exp.CopyGrantsProperty) 2558 2559 def _parse_freespace(self) -> exp.FreespaceProperty: 2560 self._match(TokenType.EQ) 2561 return self.expression( 2562 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2563 ) 2564 2565 def _parse_mergeblockratio( 2566 self, no: bool = False, default: bool = False 2567 ) -> exp.MergeBlockRatioProperty: 2568 if self._match(TokenType.EQ): 2569 return self.expression( 2570 exp.MergeBlockRatioProperty, 2571 this=self._parse_number(), 2572 percent=self._match(TokenType.PERCENT), 2573 ) 2574 2575 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2576 2577 def _parse_datablocksize( 2578 self, 2579 default: t.Optional[bool] = None, 2580 minimum: t.Optional[bool] = None, 2581 maximum: t.Optional[bool] = None, 2582 ) -> exp.DataBlocksizeProperty: 2583 self._match(TokenType.EQ) 2584 size = self._parse_number() 2585 2586 units = None 2587 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2588 units = self._prev.text 2589 2590 return self.expression( 2591 exp.DataBlocksizeProperty, 2592 size=size, 2593 units=units, 2594 default=default, 2595 minimum=minimum, 2596 maximum=maximum, 2597 ) 2598 2599 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2600 self._match(TokenType.EQ) 2601 always = self._match_text_seq("ALWAYS") 2602 manual = self._match_text_seq("MANUAL") 2603 never = self._match_text_seq("NEVER") 2604 default = self._match_text_seq("DEFAULT") 2605 2606 autotemp = None 2607 if self._match_text_seq("AUTOTEMP"): 2608 autotemp = self._parse_schema() 2609 2610 return self.expression( 2611 exp.BlockCompressionProperty, 2612 always=always, 2613 manual=manual, 2614 never=never, 2615 default=default, 2616 autotemp=autotemp, 2617 ) 2618 2619 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2620 index = self._index 2621 no = self._match_text_seq("NO") 2622 concurrent = self._match_text_seq("CONCURRENT") 2623 2624 if not self._match_text_seq("ISOLATED", "LOADING"): 2625 self._retreat(index) 2626 return None 2627 2628 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2629 return self.expression( 2630 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2631 ) 2632 2633 def _parse_locking(self) -> exp.LockingProperty: 2634 if self._match(TokenType.TABLE): 2635 kind = "TABLE" 2636 elif self._match(TokenType.VIEW): 2637 kind = "VIEW" 2638 elif self._match(TokenType.ROW): 2639 kind = "ROW" 2640 elif self._match_text_seq("DATABASE"): 2641 kind = "DATABASE" 2642 else: 2643 kind = None 2644 2645 if kind in ("DATABASE", "TABLE", "VIEW"): 2646 this = self._parse_table_parts() 2647 else: 2648 this = None 2649 2650 if self._match(TokenType.FOR): 2651 for_or_in = "FOR" 2652 elif self._match(TokenType.IN): 2653 for_or_in = "IN" 2654 else: 2655 for_or_in = None 2656 2657 if self._match_text_seq("ACCESS"): 2658 lock_type = "ACCESS" 2659 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2660 lock_type = "EXCLUSIVE" 2661 elif self._match_text_seq("SHARE"): 2662 lock_type = "SHARE" 2663 elif self._match_text_seq("READ"): 2664 lock_type = "READ" 2665 elif self._match_text_seq("WRITE"): 2666 lock_type = "WRITE" 2667 elif self._match_text_seq("CHECKSUM"): 2668 lock_type = "CHECKSUM" 2669 else: 2670 lock_type = None 2671 2672 override = self._match_text_seq("OVERRIDE") 2673 2674 return self.expression( 2675 exp.LockingProperty, 2676 this=this, 2677 kind=kind, 2678 for_or_in=for_or_in, 2679 lock_type=lock_type, 2680 override=override, 2681 ) 2682 2683 def _parse_partition_by(self) -> t.List[exp.Expression]: 2684 if self._match(TokenType.PARTITION_BY): 2685 return self._parse_csv(self._parse_assignment) 2686 return [] 2687 2688 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2689 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2690 if self._match_text_seq("MINVALUE"): 2691 return exp.var("MINVALUE") 2692 if self._match_text_seq("MAXVALUE"): 2693 return exp.var("MAXVALUE") 2694 return self._parse_bitwise() 2695 2696 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2697 expression = None 2698 from_expressions = None 2699 to_expressions = None 2700 2701 if self._match(TokenType.IN): 2702 this = self._parse_wrapped_csv(self._parse_bitwise) 2703 elif self._match(TokenType.FROM): 2704 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2705 self._match_text_seq("TO") 2706 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2707 elif self._match_text_seq("WITH", "(", "MODULUS"): 2708 this = self._parse_number() 2709 self._match_text_seq(",", "REMAINDER") 2710 expression = self._parse_number() 2711 self._match_r_paren() 2712 else: 2713 self.raise_error("Failed to parse partition bound spec.") 2714 2715 return self.expression( 2716 exp.PartitionBoundSpec, 2717 this=this, 2718 expression=expression, 2719 from_expressions=from_expressions, 2720 to_expressions=to_expressions, 2721 ) 2722 2723 # https://www.postgresql.org/docs/current/sql-createtable.html 2724 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2725 if not self._match_text_seq("OF"): 2726 self._retreat(self._index - 1) 2727 return None 2728 2729 this = self._parse_table(schema=True) 2730 2731 if self._match(TokenType.DEFAULT): 2732 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2733 elif self._match_text_seq("FOR", "VALUES"): 2734 expression = self._parse_partition_bound_spec() 2735 else: 2736 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2737 2738 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2739 2740 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2741 self._match(TokenType.EQ) 2742 return self.expression( 2743 exp.PartitionedByProperty, 2744 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2745 ) 2746 2747 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2748 if self._match_text_seq("AND", "STATISTICS"): 2749 statistics = True 2750 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2751 statistics = False 2752 else: 2753 statistics = None 2754 2755 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2756 2757 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2758 if self._match_text_seq("SQL"): 2759 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2760 return None 2761 2762 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2763 if self._match_text_seq("SQL", "DATA"): 2764 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2765 return None 2766 2767 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2768 if self._match_text_seq("PRIMARY", "INDEX"): 2769 return exp.NoPrimaryIndexProperty() 2770 if self._match_text_seq("SQL"): 2771 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2772 return None 2773 2774 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2775 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2776 return exp.OnCommitProperty() 2777 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2778 return exp.OnCommitProperty(delete=True) 2779 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2780 2781 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2782 if self._match_text_seq("SQL", "DATA"): 2783 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2784 return None 2785 2786 def _parse_distkey(self) -> exp.DistKeyProperty: 2787 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2788 2789 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2790 table = self._parse_table(schema=True) 2791 2792 options = [] 2793 while self._match_texts(("INCLUDING", "EXCLUDING")): 2794 this = self._prev.text.upper() 2795 2796 id_var = self._parse_id_var() 2797 if not id_var: 2798 return None 2799 2800 options.append( 2801 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2802 ) 2803 2804 return self.expression(exp.LikeProperty, this=table, expressions=options) 2805 2806 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2807 return self.expression( 2808 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2809 ) 2810 2811 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2812 self._match(TokenType.EQ) 2813 return self.expression( 2814 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2815 ) 2816 2817 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2818 self._match_text_seq("WITH", "CONNECTION") 2819 return self.expression( 2820 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2821 ) 2822 2823 def _parse_returns(self) -> exp.ReturnsProperty: 2824 value: t.Optional[exp.Expression] 2825 null = None 2826 is_table = self._match(TokenType.TABLE) 2827 2828 if is_table: 2829 if self._match(TokenType.LT): 2830 value = self.expression( 2831 exp.Schema, 2832 this="TABLE", 2833 expressions=self._parse_csv(self._parse_struct_types), 2834 ) 2835 if not self._match(TokenType.GT): 2836 self.raise_error("Expecting >") 2837 else: 2838 value = self._parse_schema(exp.var("TABLE")) 2839 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2840 null = True 2841 value = None 2842 else: 2843 value = self._parse_types() 2844 2845 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2846 2847 def _parse_describe(self) -> exp.Describe: 2848 kind = self._match_set(self.CREATABLES) and self._prev.text 2849 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2850 if self._match(TokenType.DOT): 2851 style = None 2852 self._retreat(self._index - 2) 2853 2854 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2855 2856 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2857 this = self._parse_statement() 2858 else: 2859 this = self._parse_table(schema=True) 2860 2861 properties = self._parse_properties() 2862 expressions = properties.expressions if properties else None 2863 partition = self._parse_partition() 2864 return self.expression( 2865 exp.Describe, 2866 this=this, 2867 style=style, 2868 kind=kind, 2869 expressions=expressions, 2870 partition=partition, 2871 format=format, 2872 ) 2873 2874 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2875 kind = self._prev.text.upper() 2876 expressions = [] 2877 2878 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2879 if self._match(TokenType.WHEN): 2880 expression = self._parse_disjunction() 2881 self._match(TokenType.THEN) 2882 else: 2883 expression = None 2884 2885 else_ = self._match(TokenType.ELSE) 2886 2887 if not self._match(TokenType.INTO): 2888 return None 2889 2890 return self.expression( 2891 exp.ConditionalInsert, 2892 this=self.expression( 2893 exp.Insert, 2894 this=self._parse_table(schema=True), 2895 expression=self._parse_derived_table_values(), 2896 ), 2897 expression=expression, 2898 else_=else_, 2899 ) 2900 2901 expression = parse_conditional_insert() 2902 while expression is not None: 2903 expressions.append(expression) 2904 expression = parse_conditional_insert() 2905 2906 return self.expression( 2907 exp.MultitableInserts, 2908 kind=kind, 2909 comments=comments, 2910 expressions=expressions, 2911 source=self._parse_table(), 2912 ) 2913 2914 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2915 comments = [] 2916 hint = self._parse_hint() 2917 overwrite = self._match(TokenType.OVERWRITE) 2918 ignore = self._match(TokenType.IGNORE) 2919 local = self._match_text_seq("LOCAL") 2920 alternative = None 2921 is_function = None 2922 2923 if self._match_text_seq("DIRECTORY"): 2924 this: t.Optional[exp.Expression] = self.expression( 2925 exp.Directory, 2926 this=self._parse_var_or_string(), 2927 local=local, 2928 row_format=self._parse_row_format(match_row=True), 2929 ) 2930 else: 2931 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2932 comments += ensure_list(self._prev_comments) 2933 return self._parse_multitable_inserts(comments) 2934 2935 if self._match(TokenType.OR): 2936 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2937 2938 self._match(TokenType.INTO) 2939 comments += ensure_list(self._prev_comments) 2940 self._match(TokenType.TABLE) 2941 is_function = self._match(TokenType.FUNCTION) 2942 2943 this = ( 2944 self._parse_table(schema=True, parse_partition=True) 2945 if not is_function 2946 else self._parse_function() 2947 ) 2948 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2949 this.set("alias", self._parse_table_alias()) 2950 2951 returning = self._parse_returning() 2952 2953 return self.expression( 2954 exp.Insert, 2955 comments=comments, 2956 hint=hint, 2957 is_function=is_function, 2958 this=this, 2959 stored=self._match_text_seq("STORED") and self._parse_stored(), 2960 by_name=self._match_text_seq("BY", "NAME"), 2961 exists=self._parse_exists(), 2962 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2963 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2964 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2965 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2966 conflict=self._parse_on_conflict(), 2967 returning=returning or self._parse_returning(), 2968 overwrite=overwrite, 2969 alternative=alternative, 2970 ignore=ignore, 2971 source=self._match(TokenType.TABLE) and self._parse_table(), 2972 ) 2973 2974 def _parse_kill(self) -> exp.Kill: 2975 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2976 2977 return self.expression( 2978 exp.Kill, 2979 this=self._parse_primary(), 2980 kind=kind, 2981 ) 2982 2983 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2984 conflict = self._match_text_seq("ON", "CONFLICT") 2985 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2986 2987 if not conflict and not duplicate: 2988 return None 2989 2990 conflict_keys = None 2991 constraint = None 2992 2993 if conflict: 2994 if self._match_text_seq("ON", "CONSTRAINT"): 2995 constraint = self._parse_id_var() 2996 elif self._match(TokenType.L_PAREN): 2997 conflict_keys = self._parse_csv(self._parse_id_var) 2998 self._match_r_paren() 2999 3000 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3001 if self._prev.token_type == TokenType.UPDATE: 3002 self._match(TokenType.SET) 3003 expressions = self._parse_csv(self._parse_equality) 3004 else: 3005 expressions = None 3006 3007 return self.expression( 3008 exp.OnConflict, 3009 duplicate=duplicate, 3010 expressions=expressions, 3011 action=action, 3012 conflict_keys=conflict_keys, 3013 constraint=constraint, 3014 where=self._parse_where(), 3015 ) 3016 3017 def _parse_returning(self) -> t.Optional[exp.Returning]: 3018 if not self._match(TokenType.RETURNING): 3019 return None 3020 return self.expression( 3021 exp.Returning, 3022 expressions=self._parse_csv(self._parse_expression), 3023 into=self._match(TokenType.INTO) and self._parse_table_part(), 3024 ) 3025 3026 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3027 if not self._match(TokenType.FORMAT): 3028 return None 3029 return self._parse_row_format() 3030 3031 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3032 index = self._index 3033 with_ = with_ or self._match_text_seq("WITH") 3034 3035 if not self._match(TokenType.SERDE_PROPERTIES): 3036 self._retreat(index) 3037 return None 3038 return self.expression( 3039 exp.SerdeProperties, 3040 **{ # type: ignore 3041 "expressions": self._parse_wrapped_properties(), 3042 "with": with_, 3043 }, 3044 ) 3045 3046 def _parse_row_format( 3047 self, match_row: bool = False 3048 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3049 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3050 return None 3051 3052 if self._match_text_seq("SERDE"): 3053 this = self._parse_string() 3054 3055 serde_properties = self._parse_serde_properties() 3056 3057 return self.expression( 3058 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3059 ) 3060 3061 self._match_text_seq("DELIMITED") 3062 3063 kwargs = {} 3064 3065 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3066 kwargs["fields"] = self._parse_string() 3067 if self._match_text_seq("ESCAPED", "BY"): 3068 kwargs["escaped"] = self._parse_string() 3069 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3070 kwargs["collection_items"] = self._parse_string() 3071 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3072 kwargs["map_keys"] = self._parse_string() 3073 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3074 kwargs["lines"] = self._parse_string() 3075 if self._match_text_seq("NULL", "DEFINED", "AS"): 3076 kwargs["null"] = self._parse_string() 3077 3078 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3079 3080 def _parse_load(self) -> exp.LoadData | exp.Command: 3081 if self._match_text_seq("DATA"): 3082 local = self._match_text_seq("LOCAL") 3083 self._match_text_seq("INPATH") 3084 inpath = self._parse_string() 3085 overwrite = self._match(TokenType.OVERWRITE) 3086 self._match_pair(TokenType.INTO, TokenType.TABLE) 3087 3088 return self.expression( 3089 exp.LoadData, 3090 this=self._parse_table(schema=True), 3091 local=local, 3092 overwrite=overwrite, 3093 inpath=inpath, 3094 partition=self._parse_partition(), 3095 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3096 serde=self._match_text_seq("SERDE") and self._parse_string(), 3097 ) 3098 return self._parse_as_command(self._prev) 3099 3100 def _parse_delete(self) -> exp.Delete: 3101 # This handles MySQL's "Multiple-Table Syntax" 3102 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3103 tables = None 3104 if not self._match(TokenType.FROM, advance=False): 3105 tables = self._parse_csv(self._parse_table) or None 3106 3107 returning = self._parse_returning() 3108 3109 return self.expression( 3110 exp.Delete, 3111 tables=tables, 3112 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3113 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3114 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3115 where=self._parse_where(), 3116 returning=returning or self._parse_returning(), 3117 limit=self._parse_limit(), 3118 ) 3119 3120 def _parse_update(self) -> exp.Update: 3121 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3122 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3123 returning = self._parse_returning() 3124 return self.expression( 3125 exp.Update, 3126 **{ # type: ignore 3127 "this": this, 3128 "expressions": expressions, 3129 "from": self._parse_from(joins=True), 3130 "where": self._parse_where(), 3131 "returning": returning or self._parse_returning(), 3132 "order": self._parse_order(), 3133 "limit": self._parse_limit(), 3134 }, 3135 ) 3136 3137 def _parse_use(self) -> exp.Use: 3138 return self.expression( 3139 exp.Use, 3140 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3141 this=self._parse_table(schema=False), 3142 ) 3143 3144 def _parse_uncache(self) -> exp.Uncache: 3145 if not self._match(TokenType.TABLE): 3146 self.raise_error("Expecting TABLE after UNCACHE") 3147 3148 return self.expression( 3149 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3150 ) 3151 3152 def _parse_cache(self) -> exp.Cache: 3153 lazy = self._match_text_seq("LAZY") 3154 self._match(TokenType.TABLE) 3155 table = self._parse_table(schema=True) 3156 3157 options = [] 3158 if self._match_text_seq("OPTIONS"): 3159 self._match_l_paren() 3160 k = self._parse_string() 3161 self._match(TokenType.EQ) 3162 v = self._parse_string() 3163 options = [k, v] 3164 self._match_r_paren() 3165 3166 self._match(TokenType.ALIAS) 3167 return self.expression( 3168 exp.Cache, 3169 this=table, 3170 lazy=lazy, 3171 options=options, 3172 expression=self._parse_select(nested=True), 3173 ) 3174 3175 def _parse_partition(self) -> t.Optional[exp.Partition]: 3176 if not self._match_texts(self.PARTITION_KEYWORDS): 3177 return None 3178 3179 return self.expression( 3180 exp.Partition, 3181 subpartition=self._prev.text.upper() == "SUBPARTITION", 3182 expressions=self._parse_wrapped_csv(self._parse_assignment), 3183 ) 3184 3185 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3186 def _parse_value_expression() -> t.Optional[exp.Expression]: 3187 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3188 return exp.var(self._prev.text.upper()) 3189 return self._parse_expression() 3190 3191 if self._match(TokenType.L_PAREN): 3192 expressions = self._parse_csv(_parse_value_expression) 3193 self._match_r_paren() 3194 return self.expression(exp.Tuple, expressions=expressions) 3195 3196 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3197 expression = self._parse_expression() 3198 if expression: 3199 return self.expression(exp.Tuple, expressions=[expression]) 3200 return None 3201 3202 def _parse_projections(self) -> t.List[exp.Expression]: 3203 return self._parse_expressions() 3204 3205 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3206 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3207 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3208 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3209 ) 3210 elif self._match(TokenType.FROM): 3211 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3212 # Support parentheses for duckdb FROM-first syntax 3213 select = self._parse_select(from_=from_) 3214 if select: 3215 if not select.args.get("from"): 3216 select.set("from", from_) 3217 this = select 3218 else: 3219 this = exp.select("*").from_(t.cast(exp.From, from_)) 3220 else: 3221 this = ( 3222 self._parse_table(consume_pipe=True) 3223 if table 3224 else self._parse_select(nested=True, parse_set_operation=False) 3225 ) 3226 3227 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3228 # in case a modifier (e.g. join) is following 3229 if table and isinstance(this, exp.Values) and this.alias: 3230 alias = this.args["alias"].pop() 3231 this = exp.Table(this=this, alias=alias) 3232 3233 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3234 3235 return this 3236 3237 def _parse_select( 3238 self, 3239 nested: bool = False, 3240 table: bool = False, 3241 parse_subquery_alias: bool = True, 3242 parse_set_operation: bool = True, 3243 consume_pipe: bool = True, 3244 from_: t.Optional[exp.From] = None, 3245 ) -> t.Optional[exp.Expression]: 3246 query = self._parse_select_query( 3247 nested=nested, 3248 table=table, 3249 parse_subquery_alias=parse_subquery_alias, 3250 parse_set_operation=parse_set_operation, 3251 ) 3252 3253 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3254 if not query and from_: 3255 query = exp.select("*").from_(from_) 3256 if isinstance(query, exp.Query): 3257 query = self._parse_pipe_syntax_query(query) 3258 query = query.subquery(copy=False) if query and table else query 3259 3260 return query 3261 3262 def _parse_select_query( 3263 self, 3264 nested: bool = False, 3265 table: bool = False, 3266 parse_subquery_alias: bool = True, 3267 parse_set_operation: bool = True, 3268 ) -> t.Optional[exp.Expression]: 3269 cte = self._parse_with() 3270 3271 if cte: 3272 this = self._parse_statement() 3273 3274 if not this: 3275 self.raise_error("Failed to parse any statement following CTE") 3276 return cte 3277 3278 if "with" in this.arg_types: 3279 this.set("with", cte) 3280 else: 3281 self.raise_error(f"{this.key} does not support CTE") 3282 this = cte 3283 3284 return this 3285 3286 # duckdb supports leading with FROM x 3287 from_ = ( 3288 self._parse_from(consume_pipe=True) 3289 if self._match(TokenType.FROM, advance=False) 3290 else None 3291 ) 3292 3293 if self._match(TokenType.SELECT): 3294 comments = self._prev_comments 3295 3296 hint = self._parse_hint() 3297 3298 if self._next and not self._next.token_type == TokenType.DOT: 3299 all_ = self._match(TokenType.ALL) 3300 distinct = self._match_set(self.DISTINCT_TOKENS) 3301 else: 3302 all_, distinct = None, None 3303 3304 kind = ( 3305 self._match(TokenType.ALIAS) 3306 and self._match_texts(("STRUCT", "VALUE")) 3307 and self._prev.text.upper() 3308 ) 3309 3310 if distinct: 3311 distinct = self.expression( 3312 exp.Distinct, 3313 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3314 ) 3315 3316 if all_ and distinct: 3317 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3318 3319 operation_modifiers = [] 3320 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3321 operation_modifiers.append(exp.var(self._prev.text.upper())) 3322 3323 limit = self._parse_limit(top=True) 3324 projections = self._parse_projections() 3325 3326 this = self.expression( 3327 exp.Select, 3328 kind=kind, 3329 hint=hint, 3330 distinct=distinct, 3331 expressions=projections, 3332 limit=limit, 3333 operation_modifiers=operation_modifiers or None, 3334 ) 3335 this.comments = comments 3336 3337 into = self._parse_into() 3338 if into: 3339 this.set("into", into) 3340 3341 if not from_: 3342 from_ = self._parse_from() 3343 3344 if from_: 3345 this.set("from", from_) 3346 3347 this = self._parse_query_modifiers(this) 3348 elif (table or nested) and self._match(TokenType.L_PAREN): 3349 this = self._parse_wrapped_select(table=table) 3350 3351 # We return early here so that the UNION isn't attached to the subquery by the 3352 # following call to _parse_set_operations, but instead becomes the parent node 3353 self._match_r_paren() 3354 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3355 elif self._match(TokenType.VALUES, advance=False): 3356 this = self._parse_derived_table_values() 3357 elif from_: 3358 this = exp.select("*").from_(from_.this, copy=False) 3359 elif self._match(TokenType.SUMMARIZE): 3360 table = self._match(TokenType.TABLE) 3361 this = self._parse_select() or self._parse_string() or self._parse_table() 3362 return self.expression(exp.Summarize, this=this, table=table) 3363 elif self._match(TokenType.DESCRIBE): 3364 this = self._parse_describe() 3365 elif self._match_text_seq("STREAM"): 3366 this = self._parse_function() 3367 if this: 3368 this = self.expression(exp.Stream, this=this) 3369 else: 3370 self._retreat(self._index - 1) 3371 else: 3372 this = None 3373 3374 return self._parse_set_operations(this) if parse_set_operation else this 3375 3376 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3377 self._match_text_seq("SEARCH") 3378 3379 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3380 3381 if not kind: 3382 return None 3383 3384 self._match_text_seq("FIRST", "BY") 3385 3386 return self.expression( 3387 exp.RecursiveWithSearch, 3388 kind=kind, 3389 this=self._parse_id_var(), 3390 expression=self._match_text_seq("SET") and self._parse_id_var(), 3391 using=self._match_text_seq("USING") and self._parse_id_var(), 3392 ) 3393 3394 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3395 if not skip_with_token and not self._match(TokenType.WITH): 3396 return None 3397 3398 comments = self._prev_comments 3399 recursive = self._match(TokenType.RECURSIVE) 3400 3401 last_comments = None 3402 expressions = [] 3403 while True: 3404 cte = self._parse_cte() 3405 if isinstance(cte, exp.CTE): 3406 expressions.append(cte) 3407 if last_comments: 3408 cte.add_comments(last_comments) 3409 3410 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3411 break 3412 else: 3413 self._match(TokenType.WITH) 3414 3415 last_comments = self._prev_comments 3416 3417 return self.expression( 3418 exp.With, 3419 comments=comments, 3420 expressions=expressions, 3421 recursive=recursive, 3422 search=self._parse_recursive_with_search(), 3423 ) 3424 3425 def _parse_cte(self) -> t.Optional[exp.CTE]: 3426 index = self._index 3427 3428 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3429 if not alias or not alias.this: 3430 self.raise_error("Expected CTE to have alias") 3431 3432 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3433 self._retreat(index) 3434 return None 3435 3436 comments = self._prev_comments 3437 3438 if self._match_text_seq("NOT", "MATERIALIZED"): 3439 materialized = False 3440 elif self._match_text_seq("MATERIALIZED"): 3441 materialized = True 3442 else: 3443 materialized = None 3444 3445 cte = self.expression( 3446 exp.CTE, 3447 this=self._parse_wrapped(self._parse_statement), 3448 alias=alias, 3449 materialized=materialized, 3450 comments=comments, 3451 ) 3452 3453 values = cte.this 3454 if isinstance(values, exp.Values): 3455 if values.alias: 3456 cte.set("this", exp.select("*").from_(values)) 3457 else: 3458 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3459 3460 return cte 3461 3462 def _parse_table_alias( 3463 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3464 ) -> t.Optional[exp.TableAlias]: 3465 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3466 # so this section tries to parse the clause version and if it fails, it treats the token 3467 # as an identifier (alias) 3468 if self._can_parse_limit_or_offset(): 3469 return None 3470 3471 any_token = self._match(TokenType.ALIAS) 3472 alias = ( 3473 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3474 or self._parse_string_as_identifier() 3475 ) 3476 3477 index = self._index 3478 if self._match(TokenType.L_PAREN): 3479 columns = self._parse_csv(self._parse_function_parameter) 3480 self._match_r_paren() if columns else self._retreat(index) 3481 else: 3482 columns = None 3483 3484 if not alias and not columns: 3485 return None 3486 3487 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3488 3489 # We bubble up comments from the Identifier to the TableAlias 3490 if isinstance(alias, exp.Identifier): 3491 table_alias.add_comments(alias.pop_comments()) 3492 3493 return table_alias 3494 3495 def _parse_subquery( 3496 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3497 ) -> t.Optional[exp.Subquery]: 3498 if not this: 3499 return None 3500 3501 return self.expression( 3502 exp.Subquery, 3503 this=this, 3504 pivots=self._parse_pivots(), 3505 alias=self._parse_table_alias() if parse_alias else None, 3506 sample=self._parse_table_sample(), 3507 ) 3508 3509 def _implicit_unnests_to_explicit(self, this: E) -> E: 3510 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3511 3512 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3513 for i, join in enumerate(this.args.get("joins") or []): 3514 table = join.this 3515 normalized_table = table.copy() 3516 normalized_table.meta["maybe_column"] = True 3517 normalized_table = _norm(normalized_table, dialect=self.dialect) 3518 3519 if isinstance(table, exp.Table) and not join.args.get("on"): 3520 if normalized_table.parts[0].name in refs: 3521 table_as_column = table.to_column() 3522 unnest = exp.Unnest(expressions=[table_as_column]) 3523 3524 # Table.to_column creates a parent Alias node that we want to convert to 3525 # a TableAlias and attach to the Unnest, so it matches the parser's output 3526 if isinstance(table.args.get("alias"), exp.TableAlias): 3527 table_as_column.replace(table_as_column.this) 3528 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3529 3530 table.replace(unnest) 3531 3532 refs.add(normalized_table.alias_or_name) 3533 3534 return this 3535 3536 def _parse_query_modifiers( 3537 self, this: t.Optional[exp.Expression] 3538 ) -> t.Optional[exp.Expression]: 3539 if isinstance(this, self.MODIFIABLES): 3540 for join in self._parse_joins(): 3541 this.append("joins", join) 3542 for lateral in iter(self._parse_lateral, None): 3543 this.append("laterals", lateral) 3544 3545 while True: 3546 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3547 modifier_token = self._curr 3548 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3549 key, expression = parser(self) 3550 3551 if expression: 3552 if this.args.get(key): 3553 self.raise_error( 3554 f"Found multiple '{modifier_token.text.upper()}' clauses", 3555 token=modifier_token, 3556 ) 3557 3558 this.set(key, expression) 3559 if key == "limit": 3560 offset = expression.args.pop("offset", None) 3561 3562 if offset: 3563 offset = exp.Offset(expression=offset) 3564 this.set("offset", offset) 3565 3566 limit_by_expressions = expression.expressions 3567 expression.set("expressions", None) 3568 offset.set("expressions", limit_by_expressions) 3569 continue 3570 break 3571 3572 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3573 this = self._implicit_unnests_to_explicit(this) 3574 3575 return this 3576 3577 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3578 start = self._curr 3579 while self._curr: 3580 self._advance() 3581 3582 end = self._tokens[self._index - 1] 3583 return exp.Hint(expressions=[self._find_sql(start, end)]) 3584 3585 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3586 return self._parse_function_call() 3587 3588 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3589 start_index = self._index 3590 should_fallback_to_string = False 3591 3592 hints = [] 3593 try: 3594 for hint in iter( 3595 lambda: self._parse_csv( 3596 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3597 ), 3598 [], 3599 ): 3600 hints.extend(hint) 3601 except ParseError: 3602 should_fallback_to_string = True 3603 3604 if should_fallback_to_string or self._curr: 3605 self._retreat(start_index) 3606 return self._parse_hint_fallback_to_string() 3607 3608 return self.expression(exp.Hint, expressions=hints) 3609 3610 def _parse_hint(self) -> t.Optional[exp.Hint]: 3611 if self._match(TokenType.HINT) and self._prev_comments: 3612 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3613 3614 return None 3615 3616 def _parse_into(self) -> t.Optional[exp.Into]: 3617 if not self._match(TokenType.INTO): 3618 return None 3619 3620 temp = self._match(TokenType.TEMPORARY) 3621 unlogged = self._match_text_seq("UNLOGGED") 3622 self._match(TokenType.TABLE) 3623 3624 return self.expression( 3625 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3626 ) 3627 3628 def _parse_from( 3629 self, 3630 joins: bool = False, 3631 skip_from_token: bool = False, 3632 consume_pipe: bool = False, 3633 ) -> t.Optional[exp.From]: 3634 if not skip_from_token and not self._match(TokenType.FROM): 3635 return None 3636 3637 return self.expression( 3638 exp.From, 3639 comments=self._prev_comments, 3640 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3641 ) 3642 3643 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3644 return self.expression( 3645 exp.MatchRecognizeMeasure, 3646 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3647 this=self._parse_expression(), 3648 ) 3649 3650 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3651 if not self._match(TokenType.MATCH_RECOGNIZE): 3652 return None 3653 3654 self._match_l_paren() 3655 3656 partition = self._parse_partition_by() 3657 order = self._parse_order() 3658 3659 measures = ( 3660 self._parse_csv(self._parse_match_recognize_measure) 3661 if self._match_text_seq("MEASURES") 3662 else None 3663 ) 3664 3665 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3666 rows = exp.var("ONE ROW PER MATCH") 3667 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3668 text = "ALL ROWS PER MATCH" 3669 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3670 text += " SHOW EMPTY MATCHES" 3671 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3672 text += " OMIT EMPTY MATCHES" 3673 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3674 text += " WITH UNMATCHED ROWS" 3675 rows = exp.var(text) 3676 else: 3677 rows = None 3678 3679 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3680 text = "AFTER MATCH SKIP" 3681 if self._match_text_seq("PAST", "LAST", "ROW"): 3682 text += " PAST LAST ROW" 3683 elif self._match_text_seq("TO", "NEXT", "ROW"): 3684 text += " TO NEXT ROW" 3685 elif self._match_text_seq("TO", "FIRST"): 3686 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3687 elif self._match_text_seq("TO", "LAST"): 3688 text += f" TO LAST {self._advance_any().text}" # type: ignore 3689 after = exp.var(text) 3690 else: 3691 after = None 3692 3693 if self._match_text_seq("PATTERN"): 3694 self._match_l_paren() 3695 3696 if not self._curr: 3697 self.raise_error("Expecting )", self._curr) 3698 3699 paren = 1 3700 start = self._curr 3701 3702 while self._curr and paren > 0: 3703 if self._curr.token_type == TokenType.L_PAREN: 3704 paren += 1 3705 if self._curr.token_type == TokenType.R_PAREN: 3706 paren -= 1 3707 3708 end = self._prev 3709 self._advance() 3710 3711 if paren > 0: 3712 self.raise_error("Expecting )", self._curr) 3713 3714 pattern = exp.var(self._find_sql(start, end)) 3715 else: 3716 pattern = None 3717 3718 define = ( 3719 self._parse_csv(self._parse_name_as_expression) 3720 if self._match_text_seq("DEFINE") 3721 else None 3722 ) 3723 3724 self._match_r_paren() 3725 3726 return self.expression( 3727 exp.MatchRecognize, 3728 partition_by=partition, 3729 order=order, 3730 measures=measures, 3731 rows=rows, 3732 after=after, 3733 pattern=pattern, 3734 define=define, 3735 alias=self._parse_table_alias(), 3736 ) 3737 3738 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3739 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3740 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3741 cross_apply = False 3742 3743 if cross_apply is not None: 3744 this = self._parse_select(table=True) 3745 view = None 3746 outer = None 3747 elif self._match(TokenType.LATERAL): 3748 this = self._parse_select(table=True) 3749 view = self._match(TokenType.VIEW) 3750 outer = self._match(TokenType.OUTER) 3751 else: 3752 return None 3753 3754 if not this: 3755 this = ( 3756 self._parse_unnest() 3757 or self._parse_function() 3758 or self._parse_id_var(any_token=False) 3759 ) 3760 3761 while self._match(TokenType.DOT): 3762 this = exp.Dot( 3763 this=this, 3764 expression=self._parse_function() or self._parse_id_var(any_token=False), 3765 ) 3766 3767 ordinality: t.Optional[bool] = None 3768 3769 if view: 3770 table = self._parse_id_var(any_token=False) 3771 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3772 table_alias: t.Optional[exp.TableAlias] = self.expression( 3773 exp.TableAlias, this=table, columns=columns 3774 ) 3775 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3776 # We move the alias from the lateral's child node to the lateral itself 3777 table_alias = this.args["alias"].pop() 3778 else: 3779 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3780 table_alias = self._parse_table_alias() 3781 3782 return self.expression( 3783 exp.Lateral, 3784 this=this, 3785 view=view, 3786 outer=outer, 3787 alias=table_alias, 3788 cross_apply=cross_apply, 3789 ordinality=ordinality, 3790 ) 3791 3792 def _parse_join_parts( 3793 self, 3794 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3795 return ( 3796 self._match_set(self.JOIN_METHODS) and self._prev, 3797 self._match_set(self.JOIN_SIDES) and self._prev, 3798 self._match_set(self.JOIN_KINDS) and self._prev, 3799 ) 3800 3801 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3802 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3803 this = self._parse_column() 3804 if isinstance(this, exp.Column): 3805 return this.this 3806 return this 3807 3808 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3809 3810 def _parse_join( 3811 self, skip_join_token: bool = False, parse_bracket: bool = False 3812 ) -> t.Optional[exp.Join]: 3813 if self._match(TokenType.COMMA): 3814 table = self._try_parse(self._parse_table) 3815 cross_join = self.expression(exp.Join, this=table) if table else None 3816 3817 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3818 cross_join.set("kind", "CROSS") 3819 3820 return cross_join 3821 3822 index = self._index 3823 method, side, kind = self._parse_join_parts() 3824 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3825 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3826 join_comments = self._prev_comments 3827 3828 if not skip_join_token and not join: 3829 self._retreat(index) 3830 kind = None 3831 method = None 3832 side = None 3833 3834 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3835 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3836 3837 if not skip_join_token and not join and not outer_apply and not cross_apply: 3838 return None 3839 3840 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3841 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3842 kwargs["expressions"] = self._parse_csv( 3843 lambda: self._parse_table(parse_bracket=parse_bracket) 3844 ) 3845 3846 if method: 3847 kwargs["method"] = method.text 3848 if side: 3849 kwargs["side"] = side.text 3850 if kind: 3851 kwargs["kind"] = kind.text 3852 if hint: 3853 kwargs["hint"] = hint 3854 3855 if self._match(TokenType.MATCH_CONDITION): 3856 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3857 3858 if self._match(TokenType.ON): 3859 kwargs["on"] = self._parse_assignment() 3860 elif self._match(TokenType.USING): 3861 kwargs["using"] = self._parse_using_identifiers() 3862 elif ( 3863 not method 3864 and not (outer_apply or cross_apply) 3865 and not isinstance(kwargs["this"], exp.Unnest) 3866 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3867 ): 3868 index = self._index 3869 joins: t.Optional[list] = list(self._parse_joins()) 3870 3871 if joins and self._match(TokenType.ON): 3872 kwargs["on"] = self._parse_assignment() 3873 elif joins and self._match(TokenType.USING): 3874 kwargs["using"] = self._parse_using_identifiers() 3875 else: 3876 joins = None 3877 self._retreat(index) 3878 3879 kwargs["this"].set("joins", joins if joins else None) 3880 3881 kwargs["pivots"] = self._parse_pivots() 3882 3883 comments = [c for token in (method, side, kind) if token for c in token.comments] 3884 comments = (join_comments or []) + comments 3885 3886 if ( 3887 self.ADD_JOIN_ON_TRUE 3888 and not kwargs.get("on") 3889 and not kwargs.get("using") 3890 and not kwargs.get("method") 3891 and kwargs.get("kind") in (None, "INNER", "OUTER") 3892 ): 3893 kwargs["on"] = exp.true() 3894 3895 return self.expression(exp.Join, comments=comments, **kwargs) 3896 3897 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3898 this = self._parse_assignment() 3899 3900 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3901 return this 3902 3903 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3904 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3905 3906 return this 3907 3908 def _parse_index_params(self) -> exp.IndexParameters: 3909 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3910 3911 if self._match(TokenType.L_PAREN, advance=False): 3912 columns = self._parse_wrapped_csv(self._parse_with_operator) 3913 else: 3914 columns = None 3915 3916 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3917 partition_by = self._parse_partition_by() 3918 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3919 tablespace = ( 3920 self._parse_var(any_token=True) 3921 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3922 else None 3923 ) 3924 where = self._parse_where() 3925 3926 on = self._parse_field() if self._match(TokenType.ON) else None 3927 3928 return self.expression( 3929 exp.IndexParameters, 3930 using=using, 3931 columns=columns, 3932 include=include, 3933 partition_by=partition_by, 3934 where=where, 3935 with_storage=with_storage, 3936 tablespace=tablespace, 3937 on=on, 3938 ) 3939 3940 def _parse_index( 3941 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3942 ) -> t.Optional[exp.Index]: 3943 if index or anonymous: 3944 unique = None 3945 primary = None 3946 amp = None 3947 3948 self._match(TokenType.ON) 3949 self._match(TokenType.TABLE) # hive 3950 table = self._parse_table_parts(schema=True) 3951 else: 3952 unique = self._match(TokenType.UNIQUE) 3953 primary = self._match_text_seq("PRIMARY") 3954 amp = self._match_text_seq("AMP") 3955 3956 if not self._match(TokenType.INDEX): 3957 return None 3958 3959 index = self._parse_id_var() 3960 table = None 3961 3962 params = self._parse_index_params() 3963 3964 return self.expression( 3965 exp.Index, 3966 this=index, 3967 table=table, 3968 unique=unique, 3969 primary=primary, 3970 amp=amp, 3971 params=params, 3972 ) 3973 3974 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3975 hints: t.List[exp.Expression] = [] 3976 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3977 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3978 hints.append( 3979 self.expression( 3980 exp.WithTableHint, 3981 expressions=self._parse_csv( 3982 lambda: self._parse_function() or self._parse_var(any_token=True) 3983 ), 3984 ) 3985 ) 3986 self._match_r_paren() 3987 else: 3988 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3989 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3990 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3991 3992 self._match_set((TokenType.INDEX, TokenType.KEY)) 3993 if self._match(TokenType.FOR): 3994 hint.set("target", self._advance_any() and self._prev.text.upper()) 3995 3996 hint.set("expressions", self._parse_wrapped_id_vars()) 3997 hints.append(hint) 3998 3999 return hints or None 4000 4001 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4002 return ( 4003 (not schema and self._parse_function(optional_parens=False)) 4004 or self._parse_id_var(any_token=False) 4005 or self._parse_string_as_identifier() 4006 or self._parse_placeholder() 4007 ) 4008 4009 def _parse_table_parts( 4010 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4011 ) -> exp.Table: 4012 catalog = None 4013 db = None 4014 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4015 4016 while self._match(TokenType.DOT): 4017 if catalog: 4018 # This allows nesting the table in arbitrarily many dot expressions if needed 4019 table = self.expression( 4020 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4021 ) 4022 else: 4023 catalog = db 4024 db = table 4025 # "" used for tsql FROM a..b case 4026 table = self._parse_table_part(schema=schema) or "" 4027 4028 if ( 4029 wildcard 4030 and self._is_connected() 4031 and (isinstance(table, exp.Identifier) or not table) 4032 and self._match(TokenType.STAR) 4033 ): 4034 if isinstance(table, exp.Identifier): 4035 table.args["this"] += "*" 4036 else: 4037 table = exp.Identifier(this="*") 4038 4039 # We bubble up comments from the Identifier to the Table 4040 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4041 4042 if is_db_reference: 4043 catalog = db 4044 db = table 4045 table = None 4046 4047 if not table and not is_db_reference: 4048 self.raise_error(f"Expected table name but got {self._curr}") 4049 if not db and is_db_reference: 4050 self.raise_error(f"Expected database name but got {self._curr}") 4051 4052 table = self.expression( 4053 exp.Table, 4054 comments=comments, 4055 this=table, 4056 db=db, 4057 catalog=catalog, 4058 ) 4059 4060 changes = self._parse_changes() 4061 if changes: 4062 table.set("changes", changes) 4063 4064 at_before = self._parse_historical_data() 4065 if at_before: 4066 table.set("when", at_before) 4067 4068 pivots = self._parse_pivots() 4069 if pivots: 4070 table.set("pivots", pivots) 4071 4072 return table 4073 4074 def _parse_table( 4075 self, 4076 schema: bool = False, 4077 joins: bool = False, 4078 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4079 parse_bracket: bool = False, 4080 is_db_reference: bool = False, 4081 parse_partition: bool = False, 4082 consume_pipe: bool = False, 4083 ) -> t.Optional[exp.Expression]: 4084 lateral = self._parse_lateral() 4085 if lateral: 4086 return lateral 4087 4088 unnest = self._parse_unnest() 4089 if unnest: 4090 return unnest 4091 4092 values = self._parse_derived_table_values() 4093 if values: 4094 return values 4095 4096 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4097 if subquery: 4098 if not subquery.args.get("pivots"): 4099 subquery.set("pivots", self._parse_pivots()) 4100 return subquery 4101 4102 bracket = parse_bracket and self._parse_bracket(None) 4103 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4104 4105 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4106 self._parse_table 4107 ) 4108 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4109 4110 only = self._match(TokenType.ONLY) 4111 4112 this = t.cast( 4113 exp.Expression, 4114 bracket 4115 or rows_from 4116 or self._parse_bracket( 4117 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4118 ), 4119 ) 4120 4121 if only: 4122 this.set("only", only) 4123 4124 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4125 self._match_text_seq("*") 4126 4127 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4128 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4129 this.set("partition", self._parse_partition()) 4130 4131 if schema: 4132 return self._parse_schema(this=this) 4133 4134 version = self._parse_version() 4135 4136 if version: 4137 this.set("version", version) 4138 4139 if self.dialect.ALIAS_POST_TABLESAMPLE: 4140 this.set("sample", self._parse_table_sample()) 4141 4142 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4143 if alias: 4144 this.set("alias", alias) 4145 4146 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4147 return self.expression( 4148 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4149 ) 4150 4151 this.set("hints", self._parse_table_hints()) 4152 4153 if not this.args.get("pivots"): 4154 this.set("pivots", self._parse_pivots()) 4155 4156 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4157 this.set("sample", self._parse_table_sample()) 4158 4159 if joins: 4160 for join in self._parse_joins(): 4161 this.append("joins", join) 4162 4163 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4164 this.set("ordinality", True) 4165 this.set("alias", self._parse_table_alias()) 4166 4167 return this 4168 4169 def _parse_version(self) -> t.Optional[exp.Version]: 4170 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4171 this = "TIMESTAMP" 4172 elif self._match(TokenType.VERSION_SNAPSHOT): 4173 this = "VERSION" 4174 else: 4175 return None 4176 4177 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4178 kind = self._prev.text.upper() 4179 start = self._parse_bitwise() 4180 self._match_texts(("TO", "AND")) 4181 end = self._parse_bitwise() 4182 expression: t.Optional[exp.Expression] = self.expression( 4183 exp.Tuple, expressions=[start, end] 4184 ) 4185 elif self._match_text_seq("CONTAINED", "IN"): 4186 kind = "CONTAINED IN" 4187 expression = self.expression( 4188 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4189 ) 4190 elif self._match(TokenType.ALL): 4191 kind = "ALL" 4192 expression = None 4193 else: 4194 self._match_text_seq("AS", "OF") 4195 kind = "AS OF" 4196 expression = self._parse_type() 4197 4198 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4199 4200 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4201 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4202 index = self._index 4203 historical_data = None 4204 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4205 this = self._prev.text.upper() 4206 kind = ( 4207 self._match(TokenType.L_PAREN) 4208 and self._match_texts(self.HISTORICAL_DATA_KIND) 4209 and self._prev.text.upper() 4210 ) 4211 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4212 4213 if expression: 4214 self._match_r_paren() 4215 historical_data = self.expression( 4216 exp.HistoricalData, this=this, kind=kind, expression=expression 4217 ) 4218 else: 4219 self._retreat(index) 4220 4221 return historical_data 4222 4223 def _parse_changes(self) -> t.Optional[exp.Changes]: 4224 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4225 return None 4226 4227 information = self._parse_var(any_token=True) 4228 self._match_r_paren() 4229 4230 return self.expression( 4231 exp.Changes, 4232 information=information, 4233 at_before=self._parse_historical_data(), 4234 end=self._parse_historical_data(), 4235 ) 4236 4237 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4238 if not self._match(TokenType.UNNEST): 4239 return None 4240 4241 expressions = self._parse_wrapped_csv(self._parse_equality) 4242 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4243 4244 alias = self._parse_table_alias() if with_alias else None 4245 4246 if alias: 4247 if self.dialect.UNNEST_COLUMN_ONLY: 4248 if alias.args.get("columns"): 4249 self.raise_error("Unexpected extra column alias in unnest.") 4250 4251 alias.set("columns", [alias.this]) 4252 alias.set("this", None) 4253 4254 columns = alias.args.get("columns") or [] 4255 if offset and len(expressions) < len(columns): 4256 offset = columns.pop() 4257 4258 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4259 self._match(TokenType.ALIAS) 4260 offset = self._parse_id_var( 4261 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4262 ) or exp.to_identifier("offset") 4263 4264 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4265 4266 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4267 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4268 if not is_derived and not ( 4269 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4270 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4271 ): 4272 return None 4273 4274 expressions = self._parse_csv(self._parse_value) 4275 alias = self._parse_table_alias() 4276 4277 if is_derived: 4278 self._match_r_paren() 4279 4280 return self.expression( 4281 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4282 ) 4283 4284 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4285 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4286 as_modifier and self._match_text_seq("USING", "SAMPLE") 4287 ): 4288 return None 4289 4290 bucket_numerator = None 4291 bucket_denominator = None 4292 bucket_field = None 4293 percent = None 4294 size = None 4295 seed = None 4296 4297 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4298 matched_l_paren = self._match(TokenType.L_PAREN) 4299 4300 if self.TABLESAMPLE_CSV: 4301 num = None 4302 expressions = self._parse_csv(self._parse_primary) 4303 else: 4304 expressions = None 4305 num = ( 4306 self._parse_factor() 4307 if self._match(TokenType.NUMBER, advance=False) 4308 else self._parse_primary() or self._parse_placeholder() 4309 ) 4310 4311 if self._match_text_seq("BUCKET"): 4312 bucket_numerator = self._parse_number() 4313 self._match_text_seq("OUT", "OF") 4314 bucket_denominator = bucket_denominator = self._parse_number() 4315 self._match(TokenType.ON) 4316 bucket_field = self._parse_field() 4317 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4318 percent = num 4319 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4320 size = num 4321 else: 4322 percent = num 4323 4324 if matched_l_paren: 4325 self._match_r_paren() 4326 4327 if self._match(TokenType.L_PAREN): 4328 method = self._parse_var(upper=True) 4329 seed = self._match(TokenType.COMMA) and self._parse_number() 4330 self._match_r_paren() 4331 elif self._match_texts(("SEED", "REPEATABLE")): 4332 seed = self._parse_wrapped(self._parse_number) 4333 4334 if not method and self.DEFAULT_SAMPLING_METHOD: 4335 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4336 4337 return self.expression( 4338 exp.TableSample, 4339 expressions=expressions, 4340 method=method, 4341 bucket_numerator=bucket_numerator, 4342 bucket_denominator=bucket_denominator, 4343 bucket_field=bucket_field, 4344 percent=percent, 4345 size=size, 4346 seed=seed, 4347 ) 4348 4349 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4350 return list(iter(self._parse_pivot, None)) or None 4351 4352 def _parse_joins(self) -> t.Iterator[exp.Join]: 4353 return iter(self._parse_join, None) 4354 4355 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4356 if not self._match(TokenType.INTO): 4357 return None 4358 4359 return self.expression( 4360 exp.UnpivotColumns, 4361 this=self._match_text_seq("NAME") and self._parse_column(), 4362 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4363 ) 4364 4365 # https://duckdb.org/docs/sql/statements/pivot 4366 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4367 def _parse_on() -> t.Optional[exp.Expression]: 4368 this = self._parse_bitwise() 4369 4370 if self._match(TokenType.IN): 4371 # PIVOT ... ON col IN (row_val1, row_val2) 4372 return self._parse_in(this) 4373 if self._match(TokenType.ALIAS, advance=False): 4374 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4375 return self._parse_alias(this) 4376 4377 return this 4378 4379 this = self._parse_table() 4380 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4381 into = self._parse_unpivot_columns() 4382 using = self._match(TokenType.USING) and self._parse_csv( 4383 lambda: self._parse_alias(self._parse_function()) 4384 ) 4385 group = self._parse_group() 4386 4387 return self.expression( 4388 exp.Pivot, 4389 this=this, 4390 expressions=expressions, 4391 using=using, 4392 group=group, 4393 unpivot=is_unpivot, 4394 into=into, 4395 ) 4396 4397 def _parse_pivot_in(self) -> exp.In: 4398 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4399 this = self._parse_select_or_expression() 4400 4401 self._match(TokenType.ALIAS) 4402 alias = self._parse_bitwise() 4403 if alias: 4404 if isinstance(alias, exp.Column) and not alias.db: 4405 alias = alias.this 4406 return self.expression(exp.PivotAlias, this=this, alias=alias) 4407 4408 return this 4409 4410 value = self._parse_column() 4411 4412 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4413 self.raise_error("Expecting IN (") 4414 4415 if self._match(TokenType.ANY): 4416 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4417 else: 4418 exprs = self._parse_csv(_parse_aliased_expression) 4419 4420 self._match_r_paren() 4421 return self.expression(exp.In, this=value, expressions=exprs) 4422 4423 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4424 func = self._parse_function() 4425 if not func: 4426 if self._prev and self._prev.token_type == TokenType.COMMA: 4427 return None 4428 self.raise_error("Expecting an aggregation function in PIVOT") 4429 4430 return self._parse_alias(func) 4431 4432 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4433 index = self._index 4434 include_nulls = None 4435 4436 if self._match(TokenType.PIVOT): 4437 unpivot = False 4438 elif self._match(TokenType.UNPIVOT): 4439 unpivot = True 4440 4441 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4442 if self._match_text_seq("INCLUDE", "NULLS"): 4443 include_nulls = True 4444 elif self._match_text_seq("EXCLUDE", "NULLS"): 4445 include_nulls = False 4446 else: 4447 return None 4448 4449 expressions = [] 4450 4451 if not self._match(TokenType.L_PAREN): 4452 self._retreat(index) 4453 return None 4454 4455 if unpivot: 4456 expressions = self._parse_csv(self._parse_column) 4457 else: 4458 expressions = self._parse_csv(self._parse_pivot_aggregation) 4459 4460 if not expressions: 4461 self.raise_error("Failed to parse PIVOT's aggregation list") 4462 4463 if not self._match(TokenType.FOR): 4464 self.raise_error("Expecting FOR") 4465 4466 fields = [] 4467 while True: 4468 field = self._try_parse(self._parse_pivot_in) 4469 if not field: 4470 break 4471 fields.append(field) 4472 4473 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4474 self._parse_bitwise 4475 ) 4476 4477 group = self._parse_group() 4478 4479 self._match_r_paren() 4480 4481 pivot = self.expression( 4482 exp.Pivot, 4483 expressions=expressions, 4484 fields=fields, 4485 unpivot=unpivot, 4486 include_nulls=include_nulls, 4487 default_on_null=default_on_null, 4488 group=group, 4489 ) 4490 4491 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4492 pivot.set("alias", self._parse_table_alias()) 4493 4494 if not unpivot: 4495 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4496 4497 columns: t.List[exp.Expression] = [] 4498 all_fields = [] 4499 for pivot_field in pivot.fields: 4500 pivot_field_expressions = pivot_field.expressions 4501 4502 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4503 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4504 continue 4505 4506 all_fields.append( 4507 [ 4508 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4509 for fld in pivot_field_expressions 4510 ] 4511 ) 4512 4513 if all_fields: 4514 if names: 4515 all_fields.append(names) 4516 4517 # Generate all possible combinations of the pivot columns 4518 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4519 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4520 for fld_parts_tuple in itertools.product(*all_fields): 4521 fld_parts = list(fld_parts_tuple) 4522 4523 if names and self.PREFIXED_PIVOT_COLUMNS: 4524 # Move the "name" to the front of the list 4525 fld_parts.insert(0, fld_parts.pop(-1)) 4526 4527 columns.append(exp.to_identifier("_".join(fld_parts))) 4528 4529 pivot.set("columns", columns) 4530 4531 return pivot 4532 4533 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4534 return [agg.alias for agg in aggregations if agg.alias] 4535 4536 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4537 if not skip_where_token and not self._match(TokenType.PREWHERE): 4538 return None 4539 4540 return self.expression( 4541 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4542 ) 4543 4544 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4545 if not skip_where_token and not self._match(TokenType.WHERE): 4546 return None 4547 4548 return self.expression( 4549 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4550 ) 4551 4552 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4553 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4554 return None 4555 comments = self._prev_comments 4556 4557 elements: t.Dict[str, t.Any] = defaultdict(list) 4558 4559 if self._match(TokenType.ALL): 4560 elements["all"] = True 4561 elif self._match(TokenType.DISTINCT): 4562 elements["all"] = False 4563 4564 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4565 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4566 4567 while True: 4568 index = self._index 4569 4570 elements["expressions"].extend( 4571 self._parse_csv( 4572 lambda: None 4573 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4574 else self._parse_assignment() 4575 ) 4576 ) 4577 4578 before_with_index = self._index 4579 with_prefix = self._match(TokenType.WITH) 4580 4581 if self._match(TokenType.ROLLUP): 4582 elements["rollup"].append( 4583 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4584 ) 4585 elif self._match(TokenType.CUBE): 4586 elements["cube"].append( 4587 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4588 ) 4589 elif self._match(TokenType.GROUPING_SETS): 4590 elements["grouping_sets"].append( 4591 self.expression( 4592 exp.GroupingSets, 4593 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4594 ) 4595 ) 4596 elif self._match_text_seq("TOTALS"): 4597 elements["totals"] = True # type: ignore 4598 4599 if before_with_index <= self._index <= before_with_index + 1: 4600 self._retreat(before_with_index) 4601 break 4602 4603 if index == self._index: 4604 break 4605 4606 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4607 4608 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4609 return self.expression( 4610 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4611 ) 4612 4613 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4614 if self._match(TokenType.L_PAREN): 4615 grouping_set = self._parse_csv(self._parse_column) 4616 self._match_r_paren() 4617 return self.expression(exp.Tuple, expressions=grouping_set) 4618 4619 return self._parse_column() 4620 4621 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4622 if not skip_having_token and not self._match(TokenType.HAVING): 4623 return None 4624 return self.expression( 4625 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4626 ) 4627 4628 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4629 if not self._match(TokenType.QUALIFY): 4630 return None 4631 return self.expression(exp.Qualify, this=self._parse_assignment()) 4632 4633 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4634 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4635 exp.Prior, this=self._parse_bitwise() 4636 ) 4637 connect = self._parse_assignment() 4638 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4639 return connect 4640 4641 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4642 if skip_start_token: 4643 start = None 4644 elif self._match(TokenType.START_WITH): 4645 start = self._parse_assignment() 4646 else: 4647 return None 4648 4649 self._match(TokenType.CONNECT_BY) 4650 nocycle = self._match_text_seq("NOCYCLE") 4651 connect = self._parse_connect_with_prior() 4652 4653 if not start and self._match(TokenType.START_WITH): 4654 start = self._parse_assignment() 4655 4656 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4657 4658 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4659 this = self._parse_id_var(any_token=True) 4660 if self._match(TokenType.ALIAS): 4661 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4662 return this 4663 4664 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4665 if self._match_text_seq("INTERPOLATE"): 4666 return self._parse_wrapped_csv(self._parse_name_as_expression) 4667 return None 4668 4669 def _parse_order( 4670 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4671 ) -> t.Optional[exp.Expression]: 4672 siblings = None 4673 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4674 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4675 return this 4676 4677 siblings = True 4678 4679 return self.expression( 4680 exp.Order, 4681 comments=self._prev_comments, 4682 this=this, 4683 expressions=self._parse_csv(self._parse_ordered), 4684 siblings=siblings, 4685 ) 4686 4687 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4688 if not self._match(token): 4689 return None 4690 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4691 4692 def _parse_ordered( 4693 self, parse_method: t.Optional[t.Callable] = None 4694 ) -> t.Optional[exp.Ordered]: 4695 this = parse_method() if parse_method else self._parse_assignment() 4696 if not this: 4697 return None 4698 4699 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4700 this = exp.var("ALL") 4701 4702 asc = self._match(TokenType.ASC) 4703 desc = self._match(TokenType.DESC) or (asc and False) 4704 4705 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4706 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4707 4708 nulls_first = is_nulls_first or False 4709 explicitly_null_ordered = is_nulls_first or is_nulls_last 4710 4711 if ( 4712 not explicitly_null_ordered 4713 and ( 4714 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4715 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4716 ) 4717 and self.dialect.NULL_ORDERING != "nulls_are_last" 4718 ): 4719 nulls_first = True 4720 4721 if self._match_text_seq("WITH", "FILL"): 4722 with_fill = self.expression( 4723 exp.WithFill, 4724 **{ # type: ignore 4725 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4726 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4727 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4728 "interpolate": self._parse_interpolate(), 4729 }, 4730 ) 4731 else: 4732 with_fill = None 4733 4734 return self.expression( 4735 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4736 ) 4737 4738 def _parse_limit_options(self) -> exp.LimitOptions: 4739 percent = self._match(TokenType.PERCENT) 4740 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4741 self._match_text_seq("ONLY") 4742 with_ties = self._match_text_seq("WITH", "TIES") 4743 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4744 4745 def _parse_limit( 4746 self, 4747 this: t.Optional[exp.Expression] = None, 4748 top: bool = False, 4749 skip_limit_token: bool = False, 4750 ) -> t.Optional[exp.Expression]: 4751 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4752 comments = self._prev_comments 4753 if top: 4754 limit_paren = self._match(TokenType.L_PAREN) 4755 expression = self._parse_term() if limit_paren else self._parse_number() 4756 4757 if limit_paren: 4758 self._match_r_paren() 4759 4760 limit_options = self._parse_limit_options() 4761 else: 4762 limit_options = None 4763 expression = self._parse_term() 4764 4765 if self._match(TokenType.COMMA): 4766 offset = expression 4767 expression = self._parse_term() 4768 else: 4769 offset = None 4770 4771 limit_exp = self.expression( 4772 exp.Limit, 4773 this=this, 4774 expression=expression, 4775 offset=offset, 4776 comments=comments, 4777 limit_options=limit_options, 4778 expressions=self._parse_limit_by(), 4779 ) 4780 4781 return limit_exp 4782 4783 if self._match(TokenType.FETCH): 4784 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4785 direction = self._prev.text.upper() if direction else "FIRST" 4786 4787 count = self._parse_field(tokens=self.FETCH_TOKENS) 4788 4789 return self.expression( 4790 exp.Fetch, 4791 direction=direction, 4792 count=count, 4793 limit_options=self._parse_limit_options(), 4794 ) 4795 4796 return this 4797 4798 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4799 if not self._match(TokenType.OFFSET): 4800 return this 4801 4802 count = self._parse_term() 4803 self._match_set((TokenType.ROW, TokenType.ROWS)) 4804 4805 return self.expression( 4806 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4807 ) 4808 4809 def _can_parse_limit_or_offset(self) -> bool: 4810 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4811 return False 4812 4813 index = self._index 4814 result = bool( 4815 self._try_parse(self._parse_limit, retreat=True) 4816 or self._try_parse(self._parse_offset, retreat=True) 4817 ) 4818 self._retreat(index) 4819 return result 4820 4821 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4822 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4823 4824 def _parse_locks(self) -> t.List[exp.Lock]: 4825 locks = [] 4826 while True: 4827 update, key = None, None 4828 if self._match_text_seq("FOR", "UPDATE"): 4829 update = True 4830 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4831 "LOCK", "IN", "SHARE", "MODE" 4832 ): 4833 update = False 4834 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4835 update, key = False, True 4836 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4837 update, key = True, True 4838 else: 4839 break 4840 4841 expressions = None 4842 if self._match_text_seq("OF"): 4843 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4844 4845 wait: t.Optional[bool | exp.Expression] = None 4846 if self._match_text_seq("NOWAIT"): 4847 wait = True 4848 elif self._match_text_seq("WAIT"): 4849 wait = self._parse_primary() 4850 elif self._match_text_seq("SKIP", "LOCKED"): 4851 wait = False 4852 4853 locks.append( 4854 self.expression( 4855 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4856 ) 4857 ) 4858 4859 return locks 4860 4861 def parse_set_operation( 4862 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4863 ) -> t.Optional[exp.Expression]: 4864 start = self._index 4865 _, side_token, kind_token = self._parse_join_parts() 4866 4867 side = side_token.text if side_token else None 4868 kind = kind_token.text if kind_token else None 4869 4870 if not self._match_set(self.SET_OPERATIONS): 4871 self._retreat(start) 4872 return None 4873 4874 token_type = self._prev.token_type 4875 4876 if token_type == TokenType.UNION: 4877 operation: t.Type[exp.SetOperation] = exp.Union 4878 elif token_type == TokenType.EXCEPT: 4879 operation = exp.Except 4880 else: 4881 operation = exp.Intersect 4882 4883 comments = self._prev.comments 4884 4885 if self._match(TokenType.DISTINCT): 4886 distinct: t.Optional[bool] = True 4887 elif self._match(TokenType.ALL): 4888 distinct = False 4889 else: 4890 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4891 if distinct is None: 4892 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4893 4894 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4895 "STRICT", "CORRESPONDING" 4896 ) 4897 if self._match_text_seq("CORRESPONDING"): 4898 by_name = True 4899 if not side and not kind: 4900 kind = "INNER" 4901 4902 on_column_list = None 4903 if by_name and self._match_texts(("ON", "BY")): 4904 on_column_list = self._parse_wrapped_csv(self._parse_column) 4905 4906 expression = self._parse_select( 4907 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4908 ) 4909 4910 return self.expression( 4911 operation, 4912 comments=comments, 4913 this=this, 4914 distinct=distinct, 4915 by_name=by_name, 4916 expression=expression, 4917 side=side, 4918 kind=kind, 4919 on=on_column_list, 4920 ) 4921 4922 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4923 while this: 4924 setop = self.parse_set_operation(this) 4925 if not setop: 4926 break 4927 this = setop 4928 4929 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4930 expression = this.expression 4931 4932 if expression: 4933 for arg in self.SET_OP_MODIFIERS: 4934 expr = expression.args.get(arg) 4935 if expr: 4936 this.set(arg, expr.pop()) 4937 4938 return this 4939 4940 def _parse_expression(self) -> t.Optional[exp.Expression]: 4941 return self._parse_alias(self._parse_assignment()) 4942 4943 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4944 this = self._parse_disjunction() 4945 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4946 # This allows us to parse <non-identifier token> := <expr> 4947 this = exp.column( 4948 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4949 ) 4950 4951 while self._match_set(self.ASSIGNMENT): 4952 if isinstance(this, exp.Column) and len(this.parts) == 1: 4953 this = this.this 4954 4955 this = self.expression( 4956 self.ASSIGNMENT[self._prev.token_type], 4957 this=this, 4958 comments=self._prev_comments, 4959 expression=self._parse_assignment(), 4960 ) 4961 4962 return this 4963 4964 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4965 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4966 4967 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4968 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4969 4970 def _parse_equality(self) -> t.Optional[exp.Expression]: 4971 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4972 4973 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4974 return self._parse_tokens(self._parse_range, self.COMPARISON) 4975 4976 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4977 this = this or self._parse_bitwise() 4978 negate = self._match(TokenType.NOT) 4979 4980 if self._match_set(self.RANGE_PARSERS): 4981 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4982 if not expression: 4983 return this 4984 4985 this = expression 4986 elif self._match(TokenType.ISNULL): 4987 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4988 4989 # Postgres supports ISNULL and NOTNULL for conditions. 4990 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4991 if self._match(TokenType.NOTNULL): 4992 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4993 this = self.expression(exp.Not, this=this) 4994 4995 if negate: 4996 this = self._negate_range(this) 4997 4998 if self._match(TokenType.IS): 4999 this = self._parse_is(this) 5000 5001 return this 5002 5003 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5004 if not this: 5005 return this 5006 5007 return self.expression(exp.Not, this=this) 5008 5009 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5010 index = self._index - 1 5011 negate = self._match(TokenType.NOT) 5012 5013 if self._match_text_seq("DISTINCT", "FROM"): 5014 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5015 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5016 5017 if self._match(TokenType.JSON): 5018 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5019 5020 if self._match_text_seq("WITH"): 5021 _with = True 5022 elif self._match_text_seq("WITHOUT"): 5023 _with = False 5024 else: 5025 _with = None 5026 5027 unique = self._match(TokenType.UNIQUE) 5028 self._match_text_seq("KEYS") 5029 expression: t.Optional[exp.Expression] = self.expression( 5030 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5031 ) 5032 else: 5033 expression = self._parse_primary() or self._parse_null() 5034 if not expression: 5035 self._retreat(index) 5036 return None 5037 5038 this = self.expression(exp.Is, this=this, expression=expression) 5039 return self.expression(exp.Not, this=this) if negate else this 5040 5041 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5042 unnest = self._parse_unnest(with_alias=False) 5043 if unnest: 5044 this = self.expression(exp.In, this=this, unnest=unnest) 5045 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5046 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5047 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5048 5049 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5050 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5051 else: 5052 this = self.expression(exp.In, this=this, expressions=expressions) 5053 5054 if matched_l_paren: 5055 self._match_r_paren(this) 5056 elif not self._match(TokenType.R_BRACKET, expression=this): 5057 self.raise_error("Expecting ]") 5058 else: 5059 this = self.expression(exp.In, this=this, field=self._parse_column()) 5060 5061 return this 5062 5063 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5064 symmetric = None 5065 if self._match_text_seq("SYMMETRIC"): 5066 symmetric = True 5067 elif self._match_text_seq("ASYMMETRIC"): 5068 symmetric = False 5069 5070 low = self._parse_bitwise() 5071 self._match(TokenType.AND) 5072 high = self._parse_bitwise() 5073 5074 return self.expression( 5075 exp.Between, 5076 this=this, 5077 low=low, 5078 high=high, 5079 symmetric=symmetric, 5080 ) 5081 5082 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5083 if not self._match(TokenType.ESCAPE): 5084 return this 5085 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5086 5087 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5088 index = self._index 5089 5090 if not self._match(TokenType.INTERVAL) and match_interval: 5091 return None 5092 5093 if self._match(TokenType.STRING, advance=False): 5094 this = self._parse_primary() 5095 else: 5096 this = self._parse_term() 5097 5098 if not this or ( 5099 isinstance(this, exp.Column) 5100 and not this.table 5101 and not this.this.quoted 5102 and this.name.upper() == "IS" 5103 ): 5104 self._retreat(index) 5105 return None 5106 5107 unit = self._parse_function() or ( 5108 not self._match(TokenType.ALIAS, advance=False) 5109 and self._parse_var(any_token=True, upper=True) 5110 ) 5111 5112 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5113 # each INTERVAL expression into this canonical form so it's easy to transpile 5114 if this and this.is_number: 5115 this = exp.Literal.string(this.to_py()) 5116 elif this and this.is_string: 5117 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5118 if parts and unit: 5119 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5120 unit = None 5121 self._retreat(self._index - 1) 5122 5123 if len(parts) == 1: 5124 this = exp.Literal.string(parts[0][0]) 5125 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5126 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5127 unit = self.expression( 5128 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5129 ) 5130 5131 interval = self.expression(exp.Interval, this=this, unit=unit) 5132 5133 index = self._index 5134 self._match(TokenType.PLUS) 5135 5136 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5137 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5138 return self.expression( 5139 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5140 ) 5141 5142 self._retreat(index) 5143 return interval 5144 5145 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5146 this = self._parse_term() 5147 5148 while True: 5149 if self._match_set(self.BITWISE): 5150 this = self.expression( 5151 self.BITWISE[self._prev.token_type], 5152 this=this, 5153 expression=self._parse_term(), 5154 ) 5155 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5156 this = self.expression( 5157 exp.DPipe, 5158 this=this, 5159 expression=self._parse_term(), 5160 safe=not self.dialect.STRICT_STRING_CONCAT, 5161 ) 5162 elif self._match(TokenType.DQMARK): 5163 this = self.expression( 5164 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5165 ) 5166 elif self._match_pair(TokenType.LT, TokenType.LT): 5167 this = self.expression( 5168 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5169 ) 5170 elif self._match_pair(TokenType.GT, TokenType.GT): 5171 this = self.expression( 5172 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5173 ) 5174 else: 5175 break 5176 5177 return this 5178 5179 def _parse_term(self) -> t.Optional[exp.Expression]: 5180 this = self._parse_factor() 5181 5182 while self._match_set(self.TERM): 5183 klass = self.TERM[self._prev.token_type] 5184 comments = self._prev_comments 5185 expression = self._parse_factor() 5186 5187 this = self.expression(klass, this=this, comments=comments, expression=expression) 5188 5189 if isinstance(this, exp.Collate): 5190 expr = this.expression 5191 5192 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5193 # fallback to Identifier / Var 5194 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5195 ident = expr.this 5196 if isinstance(ident, exp.Identifier): 5197 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5198 5199 return this 5200 5201 def _parse_factor(self) -> t.Optional[exp.Expression]: 5202 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5203 this = parse_method() 5204 5205 while self._match_set(self.FACTOR): 5206 klass = self.FACTOR[self._prev.token_type] 5207 comments = self._prev_comments 5208 expression = parse_method() 5209 5210 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5211 self._retreat(self._index - 1) 5212 return this 5213 5214 this = self.expression(klass, this=this, comments=comments, expression=expression) 5215 5216 if isinstance(this, exp.Div): 5217 this.args["typed"] = self.dialect.TYPED_DIVISION 5218 this.args["safe"] = self.dialect.SAFE_DIVISION 5219 5220 return this 5221 5222 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5223 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5224 5225 def _parse_unary(self) -> t.Optional[exp.Expression]: 5226 if self._match_set(self.UNARY_PARSERS): 5227 return self.UNARY_PARSERS[self._prev.token_type](self) 5228 return self._parse_at_time_zone(self._parse_type()) 5229 5230 def _parse_type( 5231 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5232 ) -> t.Optional[exp.Expression]: 5233 interval = parse_interval and self._parse_interval() 5234 if interval: 5235 return interval 5236 5237 index = self._index 5238 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5239 5240 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5241 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5242 if isinstance(data_type, exp.Cast): 5243 # This constructor can contain ops directly after it, for instance struct unnesting: 5244 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5245 return self._parse_column_ops(data_type) 5246 5247 if data_type: 5248 index2 = self._index 5249 this = self._parse_primary() 5250 5251 if isinstance(this, exp.Literal): 5252 literal = this.name 5253 this = self._parse_column_ops(this) 5254 5255 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5256 if parser: 5257 return parser(self, this, data_type) 5258 5259 if ( 5260 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5261 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5262 and TIME_ZONE_RE.search(literal) 5263 ): 5264 data_type = exp.DataType.build("TIMESTAMPTZ") 5265 5266 return self.expression(exp.Cast, this=this, to=data_type) 5267 5268 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5269 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5270 # 5271 # If the index difference here is greater than 1, that means the parser itself must have 5272 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5273 # 5274 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5275 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5276 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5277 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5278 # 5279 # In these cases, we don't really want to return the converted type, but instead retreat 5280 # and try to parse a Column or Identifier in the section below. 5281 if data_type.expressions and index2 - index > 1: 5282 self._retreat(index2) 5283 return self._parse_column_ops(data_type) 5284 5285 self._retreat(index) 5286 5287 if fallback_to_identifier: 5288 return self._parse_id_var() 5289 5290 this = self._parse_column() 5291 return this and self._parse_column_ops(this) 5292 5293 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5294 this = self._parse_type() 5295 if not this: 5296 return None 5297 5298 if isinstance(this, exp.Column) and not this.table: 5299 this = exp.var(this.name.upper()) 5300 5301 return self.expression( 5302 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5303 ) 5304 5305 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5306 type_name = identifier.name 5307 5308 while self._match(TokenType.DOT): 5309 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5310 5311 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5312 5313 def _parse_types( 5314 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5315 ) -> t.Optional[exp.Expression]: 5316 index = self._index 5317 5318 this: t.Optional[exp.Expression] = None 5319 prefix = self._match_text_seq("SYSUDTLIB", ".") 5320 5321 if self._match_set(self.TYPE_TOKENS): 5322 type_token = self._prev.token_type 5323 else: 5324 type_token = None 5325 identifier = allow_identifiers and self._parse_id_var( 5326 any_token=False, tokens=(TokenType.VAR,) 5327 ) 5328 if isinstance(identifier, exp.Identifier): 5329 try: 5330 tokens = self.dialect.tokenize(identifier.name) 5331 except TokenError: 5332 tokens = None 5333 5334 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5335 type_token = tokens[0].token_type 5336 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5337 this = self._parse_user_defined_type(identifier) 5338 else: 5339 self._retreat(self._index - 1) 5340 return None 5341 else: 5342 return None 5343 5344 if type_token == TokenType.PSEUDO_TYPE: 5345 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5346 5347 if type_token == TokenType.OBJECT_IDENTIFIER: 5348 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5349 5350 # https://materialize.com/docs/sql/types/map/ 5351 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5352 key_type = self._parse_types( 5353 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5354 ) 5355 if not self._match(TokenType.FARROW): 5356 self._retreat(index) 5357 return None 5358 5359 value_type = self._parse_types( 5360 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5361 ) 5362 if not self._match(TokenType.R_BRACKET): 5363 self._retreat(index) 5364 return None 5365 5366 return exp.DataType( 5367 this=exp.DataType.Type.MAP, 5368 expressions=[key_type, value_type], 5369 nested=True, 5370 prefix=prefix, 5371 ) 5372 5373 nested = type_token in self.NESTED_TYPE_TOKENS 5374 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5375 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5376 expressions = None 5377 maybe_func = False 5378 5379 if self._match(TokenType.L_PAREN): 5380 if is_struct: 5381 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5382 elif nested: 5383 expressions = self._parse_csv( 5384 lambda: self._parse_types( 5385 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5386 ) 5387 ) 5388 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5389 this = expressions[0] 5390 this.set("nullable", True) 5391 self._match_r_paren() 5392 return this 5393 elif type_token in self.ENUM_TYPE_TOKENS: 5394 expressions = self._parse_csv(self._parse_equality) 5395 elif is_aggregate: 5396 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5397 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5398 ) 5399 if not func_or_ident: 5400 return None 5401 expressions = [func_or_ident] 5402 if self._match(TokenType.COMMA): 5403 expressions.extend( 5404 self._parse_csv( 5405 lambda: self._parse_types( 5406 check_func=check_func, 5407 schema=schema, 5408 allow_identifiers=allow_identifiers, 5409 ) 5410 ) 5411 ) 5412 else: 5413 expressions = self._parse_csv(self._parse_type_size) 5414 5415 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5416 if type_token == TokenType.VECTOR and len(expressions) == 2: 5417 expressions = self._parse_vector_expressions(expressions) 5418 5419 if not self._match(TokenType.R_PAREN): 5420 self._retreat(index) 5421 return None 5422 5423 maybe_func = True 5424 5425 values: t.Optional[t.List[exp.Expression]] = None 5426 5427 if nested and self._match(TokenType.LT): 5428 if is_struct: 5429 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5430 else: 5431 expressions = self._parse_csv( 5432 lambda: self._parse_types( 5433 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5434 ) 5435 ) 5436 5437 if not self._match(TokenType.GT): 5438 self.raise_error("Expecting >") 5439 5440 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5441 values = self._parse_csv(self._parse_assignment) 5442 if not values and is_struct: 5443 values = None 5444 self._retreat(self._index - 1) 5445 else: 5446 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5447 5448 if type_token in self.TIMESTAMPS: 5449 if self._match_text_seq("WITH", "TIME", "ZONE"): 5450 maybe_func = False 5451 tz_type = ( 5452 exp.DataType.Type.TIMETZ 5453 if type_token in self.TIMES 5454 else exp.DataType.Type.TIMESTAMPTZ 5455 ) 5456 this = exp.DataType(this=tz_type, expressions=expressions) 5457 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5458 maybe_func = False 5459 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5460 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5461 maybe_func = False 5462 elif type_token == TokenType.INTERVAL: 5463 unit = self._parse_var(upper=True) 5464 if unit: 5465 if self._match_text_seq("TO"): 5466 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5467 5468 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5469 else: 5470 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5471 elif type_token == TokenType.VOID: 5472 this = exp.DataType(this=exp.DataType.Type.NULL) 5473 5474 if maybe_func and check_func: 5475 index2 = self._index 5476 peek = self._parse_string() 5477 5478 if not peek: 5479 self._retreat(index) 5480 return None 5481 5482 self._retreat(index2) 5483 5484 if not this: 5485 if self._match_text_seq("UNSIGNED"): 5486 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5487 if not unsigned_type_token: 5488 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5489 5490 type_token = unsigned_type_token or type_token 5491 5492 this = exp.DataType( 5493 this=exp.DataType.Type[type_token.value], 5494 expressions=expressions, 5495 nested=nested, 5496 prefix=prefix, 5497 ) 5498 5499 # Empty arrays/structs are allowed 5500 if values is not None: 5501 cls = exp.Struct if is_struct else exp.Array 5502 this = exp.cast(cls(expressions=values), this, copy=False) 5503 5504 elif expressions: 5505 this.set("expressions", expressions) 5506 5507 # https://materialize.com/docs/sql/types/list/#type-name 5508 while self._match(TokenType.LIST): 5509 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5510 5511 index = self._index 5512 5513 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5514 matched_array = self._match(TokenType.ARRAY) 5515 5516 while self._curr: 5517 datatype_token = self._prev.token_type 5518 matched_l_bracket = self._match(TokenType.L_BRACKET) 5519 5520 if (not matched_l_bracket and not matched_array) or ( 5521 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5522 ): 5523 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5524 # not to be confused with the fixed size array parsing 5525 break 5526 5527 matched_array = False 5528 values = self._parse_csv(self._parse_assignment) or None 5529 if ( 5530 values 5531 and not schema 5532 and ( 5533 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5534 ) 5535 ): 5536 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5537 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5538 self._retreat(index) 5539 break 5540 5541 this = exp.DataType( 5542 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5543 ) 5544 self._match(TokenType.R_BRACKET) 5545 5546 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5547 converter = self.TYPE_CONVERTERS.get(this.this) 5548 if converter: 5549 this = converter(t.cast(exp.DataType, this)) 5550 5551 return this 5552 5553 def _parse_vector_expressions( 5554 self, expressions: t.List[exp.Expression] 5555 ) -> t.List[exp.Expression]: 5556 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5557 5558 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5559 index = self._index 5560 5561 if ( 5562 self._curr 5563 and self._next 5564 and self._curr.token_type in self.TYPE_TOKENS 5565 and self._next.token_type in self.TYPE_TOKENS 5566 ): 5567 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5568 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5569 this = self._parse_id_var() 5570 else: 5571 this = ( 5572 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5573 or self._parse_id_var() 5574 ) 5575 5576 self._match(TokenType.COLON) 5577 5578 if ( 5579 type_required 5580 and not isinstance(this, exp.DataType) 5581 and not self._match_set(self.TYPE_TOKENS, advance=False) 5582 ): 5583 self._retreat(index) 5584 return self._parse_types() 5585 5586 return self._parse_column_def(this) 5587 5588 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5589 if not self._match_text_seq("AT", "TIME", "ZONE"): 5590 return this 5591 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5592 5593 def _parse_column(self) -> t.Optional[exp.Expression]: 5594 this = self._parse_column_reference() 5595 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5596 5597 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5598 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5599 5600 return column 5601 5602 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5603 this = self._parse_field() 5604 if ( 5605 not this 5606 and self._match(TokenType.VALUES, advance=False) 5607 and self.VALUES_FOLLOWED_BY_PAREN 5608 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5609 ): 5610 this = self._parse_id_var() 5611 5612 if isinstance(this, exp.Identifier): 5613 # We bubble up comments from the Identifier to the Column 5614 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5615 5616 return this 5617 5618 def _parse_colon_as_variant_extract( 5619 self, this: t.Optional[exp.Expression] 5620 ) -> t.Optional[exp.Expression]: 5621 casts = [] 5622 json_path = [] 5623 escape = None 5624 5625 while self._match(TokenType.COLON): 5626 start_index = self._index 5627 5628 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5629 path = self._parse_column_ops( 5630 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5631 ) 5632 5633 # The cast :: operator has a lower precedence than the extraction operator :, so 5634 # we rearrange the AST appropriately to avoid casting the JSON path 5635 while isinstance(path, exp.Cast): 5636 casts.append(path.to) 5637 path = path.this 5638 5639 if casts: 5640 dcolon_offset = next( 5641 i 5642 for i, t in enumerate(self._tokens[start_index:]) 5643 if t.token_type == TokenType.DCOLON 5644 ) 5645 end_token = self._tokens[start_index + dcolon_offset - 1] 5646 else: 5647 end_token = self._prev 5648 5649 if path: 5650 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5651 # it'll roundtrip to a string literal in GET_PATH 5652 if isinstance(path, exp.Identifier) and path.quoted: 5653 escape = True 5654 5655 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5656 5657 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5658 # Databricks transforms it back to the colon/dot notation 5659 if json_path: 5660 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5661 5662 if json_path_expr: 5663 json_path_expr.set("escape", escape) 5664 5665 this = self.expression( 5666 exp.JSONExtract, 5667 this=this, 5668 expression=json_path_expr, 5669 variant_extract=True, 5670 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5671 ) 5672 5673 while casts: 5674 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5675 5676 return this 5677 5678 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5679 return self._parse_types() 5680 5681 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5682 this = self._parse_bracket(this) 5683 5684 while self._match_set(self.COLUMN_OPERATORS): 5685 op_token = self._prev.token_type 5686 op = self.COLUMN_OPERATORS.get(op_token) 5687 5688 if op_token in self.CAST_COLUMN_OPERATORS: 5689 field = self._parse_dcolon() 5690 if not field: 5691 self.raise_error("Expected type") 5692 elif op and self._curr: 5693 field = self._parse_column_reference() or self._parse_bracket() 5694 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5695 field = self._parse_column_ops(field) 5696 else: 5697 field = self._parse_field(any_token=True, anonymous_func=True) 5698 5699 # Function calls can be qualified, e.g., x.y.FOO() 5700 # This converts the final AST to a series of Dots leading to the function call 5701 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5702 if isinstance(field, (exp.Func, exp.Window)) and this: 5703 this = this.transform( 5704 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5705 ) 5706 5707 if op: 5708 this = op(self, this, field) 5709 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5710 this = self.expression( 5711 exp.Column, 5712 comments=this.comments, 5713 this=field, 5714 table=this.this, 5715 db=this.args.get("table"), 5716 catalog=this.args.get("db"), 5717 ) 5718 elif isinstance(field, exp.Window): 5719 # Move the exp.Dot's to the window's function 5720 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5721 field.set("this", window_func) 5722 this = field 5723 else: 5724 this = self.expression(exp.Dot, this=this, expression=field) 5725 5726 if field and field.comments: 5727 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5728 5729 this = self._parse_bracket(this) 5730 5731 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5732 5733 def _parse_paren(self) -> t.Optional[exp.Expression]: 5734 if not self._match(TokenType.L_PAREN): 5735 return None 5736 5737 comments = self._prev_comments 5738 query = self._parse_select() 5739 5740 if query: 5741 expressions = [query] 5742 else: 5743 expressions = self._parse_expressions() 5744 5745 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5746 5747 if not this and self._match(TokenType.R_PAREN, advance=False): 5748 this = self.expression(exp.Tuple) 5749 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5750 this = self._parse_subquery(this=this, parse_alias=False) 5751 elif isinstance(this, exp.Subquery): 5752 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5753 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5754 this = self.expression(exp.Tuple, expressions=expressions) 5755 else: 5756 this = self.expression(exp.Paren, this=this) 5757 5758 if this: 5759 this.add_comments(comments) 5760 5761 self._match_r_paren(expression=this) 5762 return this 5763 5764 def _parse_primary(self) -> t.Optional[exp.Expression]: 5765 if self._match_set(self.PRIMARY_PARSERS): 5766 token_type = self._prev.token_type 5767 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5768 5769 if token_type == TokenType.STRING: 5770 expressions = [primary] 5771 while self._match(TokenType.STRING): 5772 expressions.append(exp.Literal.string(self._prev.text)) 5773 5774 if len(expressions) > 1: 5775 return self.expression(exp.Concat, expressions=expressions) 5776 5777 return primary 5778 5779 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5780 return exp.Literal.number(f"0.{self._prev.text}") 5781 5782 return self._parse_paren() 5783 5784 def _parse_field( 5785 self, 5786 any_token: bool = False, 5787 tokens: t.Optional[t.Collection[TokenType]] = None, 5788 anonymous_func: bool = False, 5789 ) -> t.Optional[exp.Expression]: 5790 if anonymous_func: 5791 field = ( 5792 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5793 or self._parse_primary() 5794 ) 5795 else: 5796 field = self._parse_primary() or self._parse_function( 5797 anonymous=anonymous_func, any_token=any_token 5798 ) 5799 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5800 5801 def _parse_function( 5802 self, 5803 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5804 anonymous: bool = False, 5805 optional_parens: bool = True, 5806 any_token: bool = False, 5807 ) -> t.Optional[exp.Expression]: 5808 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5809 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5810 fn_syntax = False 5811 if ( 5812 self._match(TokenType.L_BRACE, advance=False) 5813 and self._next 5814 and self._next.text.upper() == "FN" 5815 ): 5816 self._advance(2) 5817 fn_syntax = True 5818 5819 func = self._parse_function_call( 5820 functions=functions, 5821 anonymous=anonymous, 5822 optional_parens=optional_parens, 5823 any_token=any_token, 5824 ) 5825 5826 if fn_syntax: 5827 self._match(TokenType.R_BRACE) 5828 5829 return func 5830 5831 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5832 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5833 5834 def _parse_function_call( 5835 self, 5836 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5837 anonymous: bool = False, 5838 optional_parens: bool = True, 5839 any_token: bool = False, 5840 ) -> t.Optional[exp.Expression]: 5841 if not self._curr: 5842 return None 5843 5844 comments = self._curr.comments 5845 prev = self._prev 5846 token = self._curr 5847 token_type = self._curr.token_type 5848 this = self._curr.text 5849 upper = this.upper() 5850 5851 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5852 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5853 self._advance() 5854 return self._parse_window(parser(self)) 5855 5856 if not self._next or self._next.token_type != TokenType.L_PAREN: 5857 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5858 self._advance() 5859 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5860 5861 return None 5862 5863 if any_token: 5864 if token_type in self.RESERVED_TOKENS: 5865 return None 5866 elif token_type not in self.FUNC_TOKENS: 5867 return None 5868 5869 self._advance(2) 5870 5871 parser = self.FUNCTION_PARSERS.get(upper) 5872 if parser and not anonymous: 5873 this = parser(self) 5874 else: 5875 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5876 5877 if subquery_predicate: 5878 expr = None 5879 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5880 expr = self._parse_select() 5881 self._match_r_paren() 5882 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5883 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5884 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5885 self._advance(-1) 5886 expr = self._parse_bitwise() 5887 5888 if expr: 5889 return self.expression(subquery_predicate, comments=comments, this=expr) 5890 5891 if functions is None: 5892 functions = self.FUNCTIONS 5893 5894 function = functions.get(upper) 5895 known_function = function and not anonymous 5896 5897 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5898 args = self._parse_function_args(alias) 5899 5900 post_func_comments = self._curr and self._curr.comments 5901 if known_function and post_func_comments: 5902 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5903 # call we'll construct it as exp.Anonymous, even if it's "known" 5904 if any( 5905 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5906 for comment in post_func_comments 5907 ): 5908 known_function = False 5909 5910 if alias and known_function: 5911 args = self._kv_to_prop_eq(args) 5912 5913 if known_function: 5914 func_builder = t.cast(t.Callable, function) 5915 5916 if "dialect" in func_builder.__code__.co_varnames: 5917 func = func_builder(args, dialect=self.dialect) 5918 else: 5919 func = func_builder(args) 5920 5921 func = self.validate_expression(func, args) 5922 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5923 func.meta["name"] = this 5924 5925 this = func 5926 else: 5927 if token_type == TokenType.IDENTIFIER: 5928 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5929 5930 this = self.expression(exp.Anonymous, this=this, expressions=args) 5931 this = this.update_positions(token) 5932 5933 if isinstance(this, exp.Expression): 5934 this.add_comments(comments) 5935 5936 self._match_r_paren(this) 5937 return self._parse_window(this) 5938 5939 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5940 return expression 5941 5942 def _kv_to_prop_eq( 5943 self, expressions: t.List[exp.Expression], parse_map: bool = False 5944 ) -> t.List[exp.Expression]: 5945 transformed = [] 5946 5947 for index, e in enumerate(expressions): 5948 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5949 if isinstance(e, exp.Alias): 5950 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5951 5952 if not isinstance(e, exp.PropertyEQ): 5953 e = self.expression( 5954 exp.PropertyEQ, 5955 this=e.this if parse_map else exp.to_identifier(e.this.name), 5956 expression=e.expression, 5957 ) 5958 5959 if isinstance(e.this, exp.Column): 5960 e.this.replace(e.this.this) 5961 else: 5962 e = self._to_prop_eq(e, index) 5963 5964 transformed.append(e) 5965 5966 return transformed 5967 5968 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5969 return self._parse_statement() 5970 5971 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5972 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5973 5974 def _parse_user_defined_function( 5975 self, kind: t.Optional[TokenType] = None 5976 ) -> t.Optional[exp.Expression]: 5977 this = self._parse_table_parts(schema=True) 5978 5979 if not self._match(TokenType.L_PAREN): 5980 return this 5981 5982 expressions = self._parse_csv(self._parse_function_parameter) 5983 self._match_r_paren() 5984 return self.expression( 5985 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5986 ) 5987 5988 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5989 literal = self._parse_primary() 5990 if literal: 5991 return self.expression(exp.Introducer, this=token.text, expression=literal) 5992 5993 return self._identifier_expression(token) 5994 5995 def _parse_session_parameter(self) -> exp.SessionParameter: 5996 kind = None 5997 this = self._parse_id_var() or self._parse_primary() 5998 5999 if this and self._match(TokenType.DOT): 6000 kind = this.name 6001 this = self._parse_var() or self._parse_primary() 6002 6003 return self.expression(exp.SessionParameter, this=this, kind=kind) 6004 6005 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6006 return self._parse_id_var() 6007 6008 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6009 index = self._index 6010 6011 if self._match(TokenType.L_PAREN): 6012 expressions = t.cast( 6013 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6014 ) 6015 6016 if not self._match(TokenType.R_PAREN): 6017 self._retreat(index) 6018 else: 6019 expressions = [self._parse_lambda_arg()] 6020 6021 if self._match_set(self.LAMBDAS): 6022 return self.LAMBDAS[self._prev.token_type](self, expressions) 6023 6024 self._retreat(index) 6025 6026 this: t.Optional[exp.Expression] 6027 6028 if self._match(TokenType.DISTINCT): 6029 this = self.expression( 6030 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6031 ) 6032 else: 6033 this = self._parse_select_or_expression(alias=alias) 6034 6035 return self._parse_limit( 6036 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6037 ) 6038 6039 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6040 index = self._index 6041 if not self._match(TokenType.L_PAREN): 6042 return this 6043 6044 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6045 # expr can be of both types 6046 if self._match_set(self.SELECT_START_TOKENS): 6047 self._retreat(index) 6048 return this 6049 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6050 self._match_r_paren() 6051 return self.expression(exp.Schema, this=this, expressions=args) 6052 6053 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6054 return self._parse_column_def(self._parse_field(any_token=True)) 6055 6056 def _parse_column_def( 6057 self, this: t.Optional[exp.Expression], computed_column: bool = True 6058 ) -> t.Optional[exp.Expression]: 6059 # column defs are not really columns, they're identifiers 6060 if isinstance(this, exp.Column): 6061 this = this.this 6062 6063 if not computed_column: 6064 self._match(TokenType.ALIAS) 6065 6066 kind = self._parse_types(schema=True) 6067 6068 if self._match_text_seq("FOR", "ORDINALITY"): 6069 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6070 6071 constraints: t.List[exp.Expression] = [] 6072 6073 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6074 ("ALIAS", "MATERIALIZED") 6075 ): 6076 persisted = self._prev.text.upper() == "MATERIALIZED" 6077 constraint_kind = exp.ComputedColumnConstraint( 6078 this=self._parse_assignment(), 6079 persisted=persisted or self._match_text_seq("PERSISTED"), 6080 data_type=exp.Var(this="AUTO") 6081 if self._match_text_seq("AUTO") 6082 else self._parse_types(), 6083 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6084 ) 6085 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6086 elif ( 6087 kind 6088 and self._match(TokenType.ALIAS, advance=False) 6089 and ( 6090 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6091 or (self._next and self._next.token_type == TokenType.L_PAREN) 6092 ) 6093 ): 6094 self._advance() 6095 constraints.append( 6096 self.expression( 6097 exp.ColumnConstraint, 6098 kind=exp.ComputedColumnConstraint( 6099 this=self._parse_disjunction(), 6100 persisted=self._match_texts(("STORED", "VIRTUAL")) 6101 and self._prev.text.upper() == "STORED", 6102 ), 6103 ) 6104 ) 6105 6106 while True: 6107 constraint = self._parse_column_constraint() 6108 if not constraint: 6109 break 6110 constraints.append(constraint) 6111 6112 if not kind and not constraints: 6113 return this 6114 6115 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6116 6117 def _parse_auto_increment( 6118 self, 6119 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6120 start = None 6121 increment = None 6122 order = None 6123 6124 if self._match(TokenType.L_PAREN, advance=False): 6125 args = self._parse_wrapped_csv(self._parse_bitwise) 6126 start = seq_get(args, 0) 6127 increment = seq_get(args, 1) 6128 elif self._match_text_seq("START"): 6129 start = self._parse_bitwise() 6130 self._match_text_seq("INCREMENT") 6131 increment = self._parse_bitwise() 6132 if self._match_text_seq("ORDER"): 6133 order = True 6134 elif self._match_text_seq("NOORDER"): 6135 order = False 6136 6137 if start and increment: 6138 return exp.GeneratedAsIdentityColumnConstraint( 6139 start=start, increment=increment, this=False, order=order 6140 ) 6141 6142 return exp.AutoIncrementColumnConstraint() 6143 6144 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6145 if not self._match_text_seq("REFRESH"): 6146 self._retreat(self._index - 1) 6147 return None 6148 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6149 6150 def _parse_compress(self) -> exp.CompressColumnConstraint: 6151 if self._match(TokenType.L_PAREN, advance=False): 6152 return self.expression( 6153 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6154 ) 6155 6156 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6157 6158 def _parse_generated_as_identity( 6159 self, 6160 ) -> ( 6161 exp.GeneratedAsIdentityColumnConstraint 6162 | exp.ComputedColumnConstraint 6163 | exp.GeneratedAsRowColumnConstraint 6164 ): 6165 if self._match_text_seq("BY", "DEFAULT"): 6166 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6167 this = self.expression( 6168 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6169 ) 6170 else: 6171 self._match_text_seq("ALWAYS") 6172 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6173 6174 self._match(TokenType.ALIAS) 6175 6176 if self._match_text_seq("ROW"): 6177 start = self._match_text_seq("START") 6178 if not start: 6179 self._match(TokenType.END) 6180 hidden = self._match_text_seq("HIDDEN") 6181 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6182 6183 identity = self._match_text_seq("IDENTITY") 6184 6185 if self._match(TokenType.L_PAREN): 6186 if self._match(TokenType.START_WITH): 6187 this.set("start", self._parse_bitwise()) 6188 if self._match_text_seq("INCREMENT", "BY"): 6189 this.set("increment", self._parse_bitwise()) 6190 if self._match_text_seq("MINVALUE"): 6191 this.set("minvalue", self._parse_bitwise()) 6192 if self._match_text_seq("MAXVALUE"): 6193 this.set("maxvalue", self._parse_bitwise()) 6194 6195 if self._match_text_seq("CYCLE"): 6196 this.set("cycle", True) 6197 elif self._match_text_seq("NO", "CYCLE"): 6198 this.set("cycle", False) 6199 6200 if not identity: 6201 this.set("expression", self._parse_range()) 6202 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6203 args = self._parse_csv(self._parse_bitwise) 6204 this.set("start", seq_get(args, 0)) 6205 this.set("increment", seq_get(args, 1)) 6206 6207 self._match_r_paren() 6208 6209 return this 6210 6211 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6212 self._match_text_seq("LENGTH") 6213 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6214 6215 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6216 if self._match_text_seq("NULL"): 6217 return self.expression(exp.NotNullColumnConstraint) 6218 if self._match_text_seq("CASESPECIFIC"): 6219 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6220 if self._match_text_seq("FOR", "REPLICATION"): 6221 return self.expression(exp.NotForReplicationColumnConstraint) 6222 6223 # Unconsume the `NOT` token 6224 self._retreat(self._index - 1) 6225 return None 6226 6227 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6228 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6229 6230 procedure_option_follows = ( 6231 self._match(TokenType.WITH, advance=False) 6232 and self._next 6233 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6234 ) 6235 6236 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6237 return self.expression( 6238 exp.ColumnConstraint, 6239 this=this, 6240 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6241 ) 6242 6243 return this 6244 6245 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6246 if not self._match(TokenType.CONSTRAINT): 6247 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6248 6249 return self.expression( 6250 exp.Constraint, 6251 this=self._parse_id_var(), 6252 expressions=self._parse_unnamed_constraints(), 6253 ) 6254 6255 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6256 constraints = [] 6257 while True: 6258 constraint = self._parse_unnamed_constraint() or self._parse_function() 6259 if not constraint: 6260 break 6261 constraints.append(constraint) 6262 6263 return constraints 6264 6265 def _parse_unnamed_constraint( 6266 self, constraints: t.Optional[t.Collection[str]] = None 6267 ) -> t.Optional[exp.Expression]: 6268 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6269 constraints or self.CONSTRAINT_PARSERS 6270 ): 6271 return None 6272 6273 constraint = self._prev.text.upper() 6274 if constraint not in self.CONSTRAINT_PARSERS: 6275 self.raise_error(f"No parser found for schema constraint {constraint}.") 6276 6277 return self.CONSTRAINT_PARSERS[constraint](self) 6278 6279 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6280 return self._parse_id_var(any_token=False) 6281 6282 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6283 self._match_texts(("KEY", "INDEX")) 6284 return self.expression( 6285 exp.UniqueColumnConstraint, 6286 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6287 this=self._parse_schema(self._parse_unique_key()), 6288 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6289 on_conflict=self._parse_on_conflict(), 6290 options=self._parse_key_constraint_options(), 6291 ) 6292 6293 def _parse_key_constraint_options(self) -> t.List[str]: 6294 options = [] 6295 while True: 6296 if not self._curr: 6297 break 6298 6299 if self._match(TokenType.ON): 6300 action = None 6301 on = self._advance_any() and self._prev.text 6302 6303 if self._match_text_seq("NO", "ACTION"): 6304 action = "NO ACTION" 6305 elif self._match_text_seq("CASCADE"): 6306 action = "CASCADE" 6307 elif self._match_text_seq("RESTRICT"): 6308 action = "RESTRICT" 6309 elif self._match_pair(TokenType.SET, TokenType.NULL): 6310 action = "SET NULL" 6311 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6312 action = "SET DEFAULT" 6313 else: 6314 self.raise_error("Invalid key constraint") 6315 6316 options.append(f"ON {on} {action}") 6317 else: 6318 var = self._parse_var_from_options( 6319 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6320 ) 6321 if not var: 6322 break 6323 options.append(var.name) 6324 6325 return options 6326 6327 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6328 if match and not self._match(TokenType.REFERENCES): 6329 return None 6330 6331 expressions = None 6332 this = self._parse_table(schema=True) 6333 options = self._parse_key_constraint_options() 6334 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6335 6336 def _parse_foreign_key(self) -> exp.ForeignKey: 6337 expressions = ( 6338 self._parse_wrapped_id_vars() 6339 if not self._match(TokenType.REFERENCES, advance=False) 6340 else None 6341 ) 6342 reference = self._parse_references() 6343 on_options = {} 6344 6345 while self._match(TokenType.ON): 6346 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6347 self.raise_error("Expected DELETE or UPDATE") 6348 6349 kind = self._prev.text.lower() 6350 6351 if self._match_text_seq("NO", "ACTION"): 6352 action = "NO ACTION" 6353 elif self._match(TokenType.SET): 6354 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6355 action = "SET " + self._prev.text.upper() 6356 else: 6357 self._advance() 6358 action = self._prev.text.upper() 6359 6360 on_options[kind] = action 6361 6362 return self.expression( 6363 exp.ForeignKey, 6364 expressions=expressions, 6365 reference=reference, 6366 options=self._parse_key_constraint_options(), 6367 **on_options, # type: ignore 6368 ) 6369 6370 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6371 return self._parse_ordered() or self._parse_field() 6372 6373 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6374 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6375 self._retreat(self._index - 1) 6376 return None 6377 6378 id_vars = self._parse_wrapped_id_vars() 6379 return self.expression( 6380 exp.PeriodForSystemTimeConstraint, 6381 this=seq_get(id_vars, 0), 6382 expression=seq_get(id_vars, 1), 6383 ) 6384 6385 def _parse_primary_key( 6386 self, wrapped_optional: bool = False, in_props: bool = False 6387 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6388 desc = ( 6389 self._match_set((TokenType.ASC, TokenType.DESC)) 6390 and self._prev.token_type == TokenType.DESC 6391 ) 6392 6393 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6394 return self.expression( 6395 exp.PrimaryKeyColumnConstraint, 6396 desc=desc, 6397 options=self._parse_key_constraint_options(), 6398 ) 6399 6400 expressions = self._parse_wrapped_csv( 6401 self._parse_primary_key_part, optional=wrapped_optional 6402 ) 6403 6404 return self.expression( 6405 exp.PrimaryKey, 6406 expressions=expressions, 6407 include=self._parse_index_params(), 6408 options=self._parse_key_constraint_options(), 6409 ) 6410 6411 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6412 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6413 6414 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6415 """ 6416 Parses a datetime column in ODBC format. We parse the column into the corresponding 6417 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6418 same as we did for `DATE('yyyy-mm-dd')`. 6419 6420 Reference: 6421 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6422 """ 6423 self._match(TokenType.VAR) 6424 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6425 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6426 if not self._match(TokenType.R_BRACE): 6427 self.raise_error("Expected }") 6428 return expression 6429 6430 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6431 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6432 return this 6433 6434 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6435 map_token = seq_get(self._tokens, self._index - 2) 6436 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6437 else: 6438 parse_map = False 6439 6440 bracket_kind = self._prev.token_type 6441 if ( 6442 bracket_kind == TokenType.L_BRACE 6443 and self._curr 6444 and self._curr.token_type == TokenType.VAR 6445 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6446 ): 6447 return self._parse_odbc_datetime_literal() 6448 6449 expressions = self._parse_csv( 6450 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6451 ) 6452 6453 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6454 self.raise_error("Expected ]") 6455 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6456 self.raise_error("Expected }") 6457 6458 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6459 if bracket_kind == TokenType.L_BRACE: 6460 this = self.expression( 6461 exp.Struct, 6462 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6463 ) 6464 elif not this: 6465 this = build_array_constructor( 6466 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6467 ) 6468 else: 6469 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6470 if constructor_type: 6471 return build_array_constructor( 6472 constructor_type, 6473 args=expressions, 6474 bracket_kind=bracket_kind, 6475 dialect=self.dialect, 6476 ) 6477 6478 expressions = apply_index_offset( 6479 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6480 ) 6481 this = self.expression( 6482 exp.Bracket, 6483 this=this, 6484 expressions=expressions, 6485 comments=this.pop_comments(), 6486 ) 6487 6488 self._add_comments(this) 6489 return self._parse_bracket(this) 6490 6491 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6492 if self._match(TokenType.COLON): 6493 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6494 return this 6495 6496 def _parse_case(self) -> t.Optional[exp.Expression]: 6497 if self._match(TokenType.DOT, advance=False): 6498 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6499 self._retreat(self._index - 1) 6500 return None 6501 6502 ifs = [] 6503 default = None 6504 6505 comments = self._prev_comments 6506 expression = self._parse_assignment() 6507 6508 while self._match(TokenType.WHEN): 6509 this = self._parse_assignment() 6510 self._match(TokenType.THEN) 6511 then = self._parse_assignment() 6512 ifs.append(self.expression(exp.If, this=this, true=then)) 6513 6514 if self._match(TokenType.ELSE): 6515 default = self._parse_assignment() 6516 6517 if not self._match(TokenType.END): 6518 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6519 default = exp.column("interval") 6520 else: 6521 self.raise_error("Expected END after CASE", self._prev) 6522 6523 return self.expression( 6524 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6525 ) 6526 6527 def _parse_if(self) -> t.Optional[exp.Expression]: 6528 if self._match(TokenType.L_PAREN): 6529 args = self._parse_csv( 6530 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6531 ) 6532 this = self.validate_expression(exp.If.from_arg_list(args), args) 6533 self._match_r_paren() 6534 else: 6535 index = self._index - 1 6536 6537 if self.NO_PAREN_IF_COMMANDS and index == 0: 6538 return self._parse_as_command(self._prev) 6539 6540 condition = self._parse_assignment() 6541 6542 if not condition: 6543 self._retreat(index) 6544 return None 6545 6546 self._match(TokenType.THEN) 6547 true = self._parse_assignment() 6548 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6549 self._match(TokenType.END) 6550 this = self.expression(exp.If, this=condition, true=true, false=false) 6551 6552 return this 6553 6554 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6555 if not self._match_text_seq("VALUE", "FOR"): 6556 self._retreat(self._index - 1) 6557 return None 6558 6559 return self.expression( 6560 exp.NextValueFor, 6561 this=self._parse_column(), 6562 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6563 ) 6564 6565 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6566 this = self._parse_function() or self._parse_var_or_string(upper=True) 6567 6568 if self._match(TokenType.FROM): 6569 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6570 6571 if not self._match(TokenType.COMMA): 6572 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6573 6574 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6575 6576 def _parse_gap_fill(self) -> exp.GapFill: 6577 self._match(TokenType.TABLE) 6578 this = self._parse_table() 6579 6580 self._match(TokenType.COMMA) 6581 args = [this, *self._parse_csv(self._parse_lambda)] 6582 6583 gap_fill = exp.GapFill.from_arg_list(args) 6584 return self.validate_expression(gap_fill, args) 6585 6586 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6587 this = self._parse_assignment() 6588 6589 if not self._match(TokenType.ALIAS): 6590 if self._match(TokenType.COMMA): 6591 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6592 6593 self.raise_error("Expected AS after CAST") 6594 6595 fmt = None 6596 to = self._parse_types() 6597 6598 default = self._match(TokenType.DEFAULT) 6599 if default: 6600 default = self._parse_bitwise() 6601 self._match_text_seq("ON", "CONVERSION", "ERROR") 6602 6603 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6604 fmt_string = self._parse_string() 6605 fmt = self._parse_at_time_zone(fmt_string) 6606 6607 if not to: 6608 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6609 if to.this in exp.DataType.TEMPORAL_TYPES: 6610 this = self.expression( 6611 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6612 this=this, 6613 format=exp.Literal.string( 6614 format_time( 6615 fmt_string.this if fmt_string else "", 6616 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6617 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6618 ) 6619 ), 6620 safe=safe, 6621 ) 6622 6623 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6624 this.set("zone", fmt.args["zone"]) 6625 return this 6626 elif not to: 6627 self.raise_error("Expected TYPE after CAST") 6628 elif isinstance(to, exp.Identifier): 6629 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6630 elif to.this == exp.DataType.Type.CHAR: 6631 if self._match(TokenType.CHARACTER_SET): 6632 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6633 6634 return self.build_cast( 6635 strict=strict, 6636 this=this, 6637 to=to, 6638 format=fmt, 6639 safe=safe, 6640 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6641 default=default, 6642 ) 6643 6644 def _parse_string_agg(self) -> exp.GroupConcat: 6645 if self._match(TokenType.DISTINCT): 6646 args: t.List[t.Optional[exp.Expression]] = [ 6647 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6648 ] 6649 if self._match(TokenType.COMMA): 6650 args.extend(self._parse_csv(self._parse_assignment)) 6651 else: 6652 args = self._parse_csv(self._parse_assignment) # type: ignore 6653 6654 if self._match_text_seq("ON", "OVERFLOW"): 6655 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6656 if self._match_text_seq("ERROR"): 6657 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6658 else: 6659 self._match_text_seq("TRUNCATE") 6660 on_overflow = self.expression( 6661 exp.OverflowTruncateBehavior, 6662 this=self._parse_string(), 6663 with_count=( 6664 self._match_text_seq("WITH", "COUNT") 6665 or not self._match_text_seq("WITHOUT", "COUNT") 6666 ), 6667 ) 6668 else: 6669 on_overflow = None 6670 6671 index = self._index 6672 if not self._match(TokenType.R_PAREN) and args: 6673 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6674 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6675 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6676 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6677 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6678 6679 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6680 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6681 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6682 if not self._match_text_seq("WITHIN", "GROUP"): 6683 self._retreat(index) 6684 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6685 6686 # The corresponding match_r_paren will be called in parse_function (caller) 6687 self._match_l_paren() 6688 6689 return self.expression( 6690 exp.GroupConcat, 6691 this=self._parse_order(this=seq_get(args, 0)), 6692 separator=seq_get(args, 1), 6693 on_overflow=on_overflow, 6694 ) 6695 6696 def _parse_convert( 6697 self, strict: bool, safe: t.Optional[bool] = None 6698 ) -> t.Optional[exp.Expression]: 6699 this = self._parse_bitwise() 6700 6701 if self._match(TokenType.USING): 6702 to: t.Optional[exp.Expression] = self.expression( 6703 exp.CharacterSet, this=self._parse_var() 6704 ) 6705 elif self._match(TokenType.COMMA): 6706 to = self._parse_types() 6707 else: 6708 to = None 6709 6710 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6711 6712 def _parse_xml_table(self) -> exp.XMLTable: 6713 namespaces = None 6714 passing = None 6715 columns = None 6716 6717 if self._match_text_seq("XMLNAMESPACES", "("): 6718 namespaces = self._parse_xml_namespace() 6719 self._match_text_seq(")", ",") 6720 6721 this = self._parse_string() 6722 6723 if self._match_text_seq("PASSING"): 6724 # The BY VALUE keywords are optional and are provided for semantic clarity 6725 self._match_text_seq("BY", "VALUE") 6726 passing = self._parse_csv(self._parse_column) 6727 6728 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6729 6730 if self._match_text_seq("COLUMNS"): 6731 columns = self._parse_csv(self._parse_field_def) 6732 6733 return self.expression( 6734 exp.XMLTable, 6735 this=this, 6736 namespaces=namespaces, 6737 passing=passing, 6738 columns=columns, 6739 by_ref=by_ref, 6740 ) 6741 6742 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6743 namespaces = [] 6744 6745 while True: 6746 if self._match(TokenType.DEFAULT): 6747 uri = self._parse_string() 6748 else: 6749 uri = self._parse_alias(self._parse_string()) 6750 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6751 if not self._match(TokenType.COMMA): 6752 break 6753 6754 return namespaces 6755 6756 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6757 args = self._parse_csv(self._parse_assignment) 6758 6759 if len(args) < 3: 6760 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6761 6762 return self.expression(exp.DecodeCase, expressions=args) 6763 6764 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6765 self._match_text_seq("KEY") 6766 key = self._parse_column() 6767 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6768 self._match_text_seq("VALUE") 6769 value = self._parse_bitwise() 6770 6771 if not key and not value: 6772 return None 6773 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6774 6775 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6776 if not this or not self._match_text_seq("FORMAT", "JSON"): 6777 return this 6778 6779 return self.expression(exp.FormatJson, this=this) 6780 6781 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6782 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6783 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6784 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6785 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6786 else: 6787 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6788 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6789 6790 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6791 6792 if not empty and not error and not null: 6793 return None 6794 6795 return self.expression( 6796 exp.OnCondition, 6797 empty=empty, 6798 error=error, 6799 null=null, 6800 ) 6801 6802 def _parse_on_handling( 6803 self, on: str, *values: str 6804 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6805 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6806 for value in values: 6807 if self._match_text_seq(value, "ON", on): 6808 return f"{value} ON {on}" 6809 6810 index = self._index 6811 if self._match(TokenType.DEFAULT): 6812 default_value = self._parse_bitwise() 6813 if self._match_text_seq("ON", on): 6814 return default_value 6815 6816 self._retreat(index) 6817 6818 return None 6819 6820 @t.overload 6821 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6822 6823 @t.overload 6824 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6825 6826 def _parse_json_object(self, agg=False): 6827 star = self._parse_star() 6828 expressions = ( 6829 [star] 6830 if star 6831 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6832 ) 6833 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6834 6835 unique_keys = None 6836 if self._match_text_seq("WITH", "UNIQUE"): 6837 unique_keys = True 6838 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6839 unique_keys = False 6840 6841 self._match_text_seq("KEYS") 6842 6843 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6844 self._parse_type() 6845 ) 6846 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6847 6848 return self.expression( 6849 exp.JSONObjectAgg if agg else exp.JSONObject, 6850 expressions=expressions, 6851 null_handling=null_handling, 6852 unique_keys=unique_keys, 6853 return_type=return_type, 6854 encoding=encoding, 6855 ) 6856 6857 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6858 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6859 if not self._match_text_seq("NESTED"): 6860 this = self._parse_id_var() 6861 kind = self._parse_types(allow_identifiers=False) 6862 nested = None 6863 else: 6864 this = None 6865 kind = None 6866 nested = True 6867 6868 path = self._match_text_seq("PATH") and self._parse_string() 6869 nested_schema = nested and self._parse_json_schema() 6870 6871 return self.expression( 6872 exp.JSONColumnDef, 6873 this=this, 6874 kind=kind, 6875 path=path, 6876 nested_schema=nested_schema, 6877 ) 6878 6879 def _parse_json_schema(self) -> exp.JSONSchema: 6880 self._match_text_seq("COLUMNS") 6881 return self.expression( 6882 exp.JSONSchema, 6883 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6884 ) 6885 6886 def _parse_json_table(self) -> exp.JSONTable: 6887 this = self._parse_format_json(self._parse_bitwise()) 6888 path = self._match(TokenType.COMMA) and self._parse_string() 6889 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6890 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6891 schema = self._parse_json_schema() 6892 6893 return exp.JSONTable( 6894 this=this, 6895 schema=schema, 6896 path=path, 6897 error_handling=error_handling, 6898 empty_handling=empty_handling, 6899 ) 6900 6901 def _parse_match_against(self) -> exp.MatchAgainst: 6902 if self._match_text_seq("TABLE"): 6903 # parse SingleStore MATCH(TABLE ...) syntax 6904 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6905 expressions = [] 6906 table = self._parse_table() 6907 if table: 6908 expressions = [table] 6909 else: 6910 expressions = self._parse_csv(self._parse_column) 6911 6912 self._match_text_seq(")", "AGAINST", "(") 6913 6914 this = self._parse_string() 6915 6916 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6917 modifier = "IN NATURAL LANGUAGE MODE" 6918 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6919 modifier = f"{modifier} WITH QUERY EXPANSION" 6920 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6921 modifier = "IN BOOLEAN MODE" 6922 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6923 modifier = "WITH QUERY EXPANSION" 6924 else: 6925 modifier = None 6926 6927 return self.expression( 6928 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6929 ) 6930 6931 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6932 def _parse_open_json(self) -> exp.OpenJSON: 6933 this = self._parse_bitwise() 6934 path = self._match(TokenType.COMMA) and self._parse_string() 6935 6936 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6937 this = self._parse_field(any_token=True) 6938 kind = self._parse_types() 6939 path = self._parse_string() 6940 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6941 6942 return self.expression( 6943 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6944 ) 6945 6946 expressions = None 6947 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6948 self._match_l_paren() 6949 expressions = self._parse_csv(_parse_open_json_column_def) 6950 6951 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6952 6953 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6954 args = self._parse_csv(self._parse_bitwise) 6955 6956 if self._match(TokenType.IN): 6957 return self.expression( 6958 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6959 ) 6960 6961 if haystack_first: 6962 haystack = seq_get(args, 0) 6963 needle = seq_get(args, 1) 6964 else: 6965 haystack = seq_get(args, 1) 6966 needle = seq_get(args, 0) 6967 6968 return self.expression( 6969 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6970 ) 6971 6972 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6973 args = self._parse_csv(self._parse_table) 6974 return exp.JoinHint(this=func_name.upper(), expressions=args) 6975 6976 def _parse_substring(self) -> exp.Substring: 6977 # Postgres supports the form: substring(string [from int] [for int]) 6978 # (despite being undocumented, the reverse order also works) 6979 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6980 6981 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6982 6983 start, length = None, None 6984 6985 while self._curr: 6986 if self._match(TokenType.FROM): 6987 start = self._parse_bitwise() 6988 elif self._match(TokenType.FOR): 6989 if not start: 6990 start = exp.Literal.number(1) 6991 length = self._parse_bitwise() 6992 else: 6993 break 6994 6995 if start: 6996 args.append(start) 6997 if length: 6998 args.append(length) 6999 7000 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7001 7002 def _parse_trim(self) -> exp.Trim: 7003 # https://www.w3resource.com/sql/character-functions/trim.php 7004 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7005 7006 position = None 7007 collation = None 7008 expression = None 7009 7010 if self._match_texts(self.TRIM_TYPES): 7011 position = self._prev.text.upper() 7012 7013 this = self._parse_bitwise() 7014 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7015 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7016 expression = self._parse_bitwise() 7017 7018 if invert_order: 7019 this, expression = expression, this 7020 7021 if self._match(TokenType.COLLATE): 7022 collation = self._parse_bitwise() 7023 7024 return self.expression( 7025 exp.Trim, this=this, position=position, expression=expression, collation=collation 7026 ) 7027 7028 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7029 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7030 7031 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7032 return self._parse_window(self._parse_id_var(), alias=True) 7033 7034 def _parse_respect_or_ignore_nulls( 7035 self, this: t.Optional[exp.Expression] 7036 ) -> t.Optional[exp.Expression]: 7037 if self._match_text_seq("IGNORE", "NULLS"): 7038 return self.expression(exp.IgnoreNulls, this=this) 7039 if self._match_text_seq("RESPECT", "NULLS"): 7040 return self.expression(exp.RespectNulls, this=this) 7041 return this 7042 7043 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7044 if self._match(TokenType.HAVING): 7045 self._match_texts(("MAX", "MIN")) 7046 max = self._prev.text.upper() != "MIN" 7047 return self.expression( 7048 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7049 ) 7050 7051 return this 7052 7053 def _parse_window( 7054 self, this: t.Optional[exp.Expression], alias: bool = False 7055 ) -> t.Optional[exp.Expression]: 7056 func = this 7057 comments = func.comments if isinstance(func, exp.Expression) else None 7058 7059 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7060 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7061 if self._match_text_seq("WITHIN", "GROUP"): 7062 order = self._parse_wrapped(self._parse_order) 7063 this = self.expression(exp.WithinGroup, this=this, expression=order) 7064 7065 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7066 self._match(TokenType.WHERE) 7067 this = self.expression( 7068 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7069 ) 7070 self._match_r_paren() 7071 7072 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7073 # Some dialects choose to implement and some do not. 7074 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7075 7076 # There is some code above in _parse_lambda that handles 7077 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7078 7079 # The below changes handle 7080 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7081 7082 # Oracle allows both formats 7083 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7084 # and Snowflake chose to do the same for familiarity 7085 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7086 if isinstance(this, exp.AggFunc): 7087 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7088 7089 if ignore_respect and ignore_respect is not this: 7090 ignore_respect.replace(ignore_respect.this) 7091 this = self.expression(ignore_respect.__class__, this=this) 7092 7093 this = self._parse_respect_or_ignore_nulls(this) 7094 7095 # bigquery select from window x AS (partition by ...) 7096 if alias: 7097 over = None 7098 self._match(TokenType.ALIAS) 7099 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7100 return this 7101 else: 7102 over = self._prev.text.upper() 7103 7104 if comments and isinstance(func, exp.Expression): 7105 func.pop_comments() 7106 7107 if not self._match(TokenType.L_PAREN): 7108 return self.expression( 7109 exp.Window, 7110 comments=comments, 7111 this=this, 7112 alias=self._parse_id_var(False), 7113 over=over, 7114 ) 7115 7116 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7117 7118 first = self._match(TokenType.FIRST) 7119 if self._match_text_seq("LAST"): 7120 first = False 7121 7122 partition, order = self._parse_partition_and_order() 7123 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7124 7125 if kind: 7126 self._match(TokenType.BETWEEN) 7127 start = self._parse_window_spec() 7128 7129 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7130 exclude = ( 7131 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7132 if self._match_text_seq("EXCLUDE") 7133 else None 7134 ) 7135 7136 spec = self.expression( 7137 exp.WindowSpec, 7138 kind=kind, 7139 start=start["value"], 7140 start_side=start["side"], 7141 end=end.get("value"), 7142 end_side=end.get("side"), 7143 exclude=exclude, 7144 ) 7145 else: 7146 spec = None 7147 7148 self._match_r_paren() 7149 7150 window = self.expression( 7151 exp.Window, 7152 comments=comments, 7153 this=this, 7154 partition_by=partition, 7155 order=order, 7156 spec=spec, 7157 alias=window_alias, 7158 over=over, 7159 first=first, 7160 ) 7161 7162 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7163 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7164 return self._parse_window(window, alias=alias) 7165 7166 return window 7167 7168 def _parse_partition_and_order( 7169 self, 7170 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7171 return self._parse_partition_by(), self._parse_order() 7172 7173 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7174 self._match(TokenType.BETWEEN) 7175 7176 return { 7177 "value": ( 7178 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7179 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7180 or self._parse_type() 7181 ), 7182 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7183 } 7184 7185 def _parse_alias( 7186 self, this: t.Optional[exp.Expression], explicit: bool = False 7187 ) -> t.Optional[exp.Expression]: 7188 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7189 # so this section tries to parse the clause version and if it fails, it treats the token 7190 # as an identifier (alias) 7191 if self._can_parse_limit_or_offset(): 7192 return this 7193 7194 any_token = self._match(TokenType.ALIAS) 7195 comments = self._prev_comments or [] 7196 7197 if explicit and not any_token: 7198 return this 7199 7200 if self._match(TokenType.L_PAREN): 7201 aliases = self.expression( 7202 exp.Aliases, 7203 comments=comments, 7204 this=this, 7205 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7206 ) 7207 self._match_r_paren(aliases) 7208 return aliases 7209 7210 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7211 self.STRING_ALIASES and self._parse_string_as_identifier() 7212 ) 7213 7214 if alias: 7215 comments.extend(alias.pop_comments()) 7216 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7217 column = this.this 7218 7219 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7220 if not this.comments and column and column.comments: 7221 this.comments = column.pop_comments() 7222 7223 return this 7224 7225 def _parse_id_var( 7226 self, 7227 any_token: bool = True, 7228 tokens: t.Optional[t.Collection[TokenType]] = None, 7229 ) -> t.Optional[exp.Expression]: 7230 expression = self._parse_identifier() 7231 if not expression and ( 7232 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7233 ): 7234 quoted = self._prev.token_type == TokenType.STRING 7235 expression = self._identifier_expression(quoted=quoted) 7236 7237 return expression 7238 7239 def _parse_string(self) -> t.Optional[exp.Expression]: 7240 if self._match_set(self.STRING_PARSERS): 7241 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7242 return self._parse_placeholder() 7243 7244 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7245 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7246 if output: 7247 output.update_positions(self._prev) 7248 return output 7249 7250 def _parse_number(self) -> t.Optional[exp.Expression]: 7251 if self._match_set(self.NUMERIC_PARSERS): 7252 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7253 return self._parse_placeholder() 7254 7255 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7256 if self._match(TokenType.IDENTIFIER): 7257 return self._identifier_expression(quoted=True) 7258 return self._parse_placeholder() 7259 7260 def _parse_var( 7261 self, 7262 any_token: bool = False, 7263 tokens: t.Optional[t.Collection[TokenType]] = None, 7264 upper: bool = False, 7265 ) -> t.Optional[exp.Expression]: 7266 if ( 7267 (any_token and self._advance_any()) 7268 or self._match(TokenType.VAR) 7269 or (self._match_set(tokens) if tokens else False) 7270 ): 7271 return self.expression( 7272 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7273 ) 7274 return self._parse_placeholder() 7275 7276 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7277 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7278 self._advance() 7279 return self._prev 7280 return None 7281 7282 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7283 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7284 7285 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7286 return self._parse_primary() or self._parse_var(any_token=True) 7287 7288 def _parse_null(self) -> t.Optional[exp.Expression]: 7289 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7290 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7291 return self._parse_placeholder() 7292 7293 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7294 if self._match(TokenType.TRUE): 7295 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7296 if self._match(TokenType.FALSE): 7297 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7298 return self._parse_placeholder() 7299 7300 def _parse_star(self) -> t.Optional[exp.Expression]: 7301 if self._match(TokenType.STAR): 7302 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7303 return self._parse_placeholder() 7304 7305 def _parse_parameter(self) -> exp.Parameter: 7306 this = self._parse_identifier() or self._parse_primary_or_var() 7307 return self.expression(exp.Parameter, this=this) 7308 7309 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7310 if self._match_set(self.PLACEHOLDER_PARSERS): 7311 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7312 if placeholder: 7313 return placeholder 7314 self._advance(-1) 7315 return None 7316 7317 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7318 if not self._match_texts(keywords): 7319 return None 7320 if self._match(TokenType.L_PAREN, advance=False): 7321 return self._parse_wrapped_csv(self._parse_expression) 7322 7323 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7324 return [expression] if expression else None 7325 7326 def _parse_csv( 7327 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7328 ) -> t.List[exp.Expression]: 7329 parse_result = parse_method() 7330 items = [parse_result] if parse_result is not None else [] 7331 7332 while self._match(sep): 7333 self._add_comments(parse_result) 7334 parse_result = parse_method() 7335 if parse_result is not None: 7336 items.append(parse_result) 7337 7338 return items 7339 7340 def _parse_tokens( 7341 self, parse_method: t.Callable, expressions: t.Dict 7342 ) -> t.Optional[exp.Expression]: 7343 this = parse_method() 7344 7345 while self._match_set(expressions): 7346 this = self.expression( 7347 expressions[self._prev.token_type], 7348 this=this, 7349 comments=self._prev_comments, 7350 expression=parse_method(), 7351 ) 7352 7353 return this 7354 7355 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7356 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7357 7358 def _parse_wrapped_csv( 7359 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7360 ) -> t.List[exp.Expression]: 7361 return self._parse_wrapped( 7362 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7363 ) 7364 7365 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7366 wrapped = self._match(TokenType.L_PAREN) 7367 if not wrapped and not optional: 7368 self.raise_error("Expecting (") 7369 parse_result = parse_method() 7370 if wrapped: 7371 self._match_r_paren() 7372 return parse_result 7373 7374 def _parse_expressions(self) -> t.List[exp.Expression]: 7375 return self._parse_csv(self._parse_expression) 7376 7377 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7378 return ( 7379 self._parse_set_operations( 7380 self._parse_alias(self._parse_assignment(), explicit=True) 7381 if alias 7382 else self._parse_assignment() 7383 ) 7384 or self._parse_select() 7385 ) 7386 7387 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7388 return self._parse_query_modifiers( 7389 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7390 ) 7391 7392 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7393 this = None 7394 if self._match_texts(self.TRANSACTION_KIND): 7395 this = self._prev.text 7396 7397 self._match_texts(("TRANSACTION", "WORK")) 7398 7399 modes = [] 7400 while True: 7401 mode = [] 7402 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7403 mode.append(self._prev.text) 7404 7405 if mode: 7406 modes.append(" ".join(mode)) 7407 if not self._match(TokenType.COMMA): 7408 break 7409 7410 return self.expression(exp.Transaction, this=this, modes=modes) 7411 7412 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7413 chain = None 7414 savepoint = None 7415 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7416 7417 self._match_texts(("TRANSACTION", "WORK")) 7418 7419 if self._match_text_seq("TO"): 7420 self._match_text_seq("SAVEPOINT") 7421 savepoint = self._parse_id_var() 7422 7423 if self._match(TokenType.AND): 7424 chain = not self._match_text_seq("NO") 7425 self._match_text_seq("CHAIN") 7426 7427 if is_rollback: 7428 return self.expression(exp.Rollback, savepoint=savepoint) 7429 7430 return self.expression(exp.Commit, chain=chain) 7431 7432 def _parse_refresh(self) -> exp.Refresh: 7433 self._match(TokenType.TABLE) 7434 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7435 7436 def _parse_column_def_with_exists(self): 7437 start = self._index 7438 self._match(TokenType.COLUMN) 7439 7440 exists_column = self._parse_exists(not_=True) 7441 expression = self._parse_field_def() 7442 7443 if not isinstance(expression, exp.ColumnDef): 7444 self._retreat(start) 7445 return None 7446 7447 expression.set("exists", exists_column) 7448 7449 return expression 7450 7451 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7452 if not self._prev.text.upper() == "ADD": 7453 return None 7454 7455 expression = self._parse_column_def_with_exists() 7456 if not expression: 7457 return None 7458 7459 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7460 if self._match_texts(("FIRST", "AFTER")): 7461 position = self._prev.text 7462 column_position = self.expression( 7463 exp.ColumnPosition, this=self._parse_column(), position=position 7464 ) 7465 expression.set("position", column_position) 7466 7467 return expression 7468 7469 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7470 drop = self._match(TokenType.DROP) and self._parse_drop() 7471 if drop and not isinstance(drop, exp.Command): 7472 drop.set("kind", drop.args.get("kind", "COLUMN")) 7473 return drop 7474 7475 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7476 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7477 return self.expression( 7478 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7479 ) 7480 7481 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7482 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7483 self._match_text_seq("ADD") 7484 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7485 return self.expression( 7486 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7487 ) 7488 7489 column_def = self._parse_add_column() 7490 if isinstance(column_def, exp.ColumnDef): 7491 return column_def 7492 7493 exists = self._parse_exists(not_=True) 7494 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7495 return self.expression( 7496 exp.AddPartition, 7497 exists=exists, 7498 this=self._parse_field(any_token=True), 7499 location=self._match_text_seq("LOCATION", advance=False) 7500 and self._parse_property(), 7501 ) 7502 7503 return None 7504 7505 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7506 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7507 or self._match_text_seq("COLUMNS") 7508 ): 7509 schema = self._parse_schema() 7510 7511 return ( 7512 ensure_list(schema) 7513 if schema 7514 else self._parse_csv(self._parse_column_def_with_exists) 7515 ) 7516 7517 return self._parse_csv(_parse_add_alteration) 7518 7519 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7520 if self._match_texts(self.ALTER_ALTER_PARSERS): 7521 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7522 7523 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7524 # keyword after ALTER we default to parsing this statement 7525 self._match(TokenType.COLUMN) 7526 column = self._parse_field(any_token=True) 7527 7528 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7529 return self.expression(exp.AlterColumn, this=column, drop=True) 7530 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7531 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7532 if self._match(TokenType.COMMENT): 7533 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7534 if self._match_text_seq("DROP", "NOT", "NULL"): 7535 return self.expression( 7536 exp.AlterColumn, 7537 this=column, 7538 drop=True, 7539 allow_null=True, 7540 ) 7541 if self._match_text_seq("SET", "NOT", "NULL"): 7542 return self.expression( 7543 exp.AlterColumn, 7544 this=column, 7545 allow_null=False, 7546 ) 7547 7548 if self._match_text_seq("SET", "VISIBLE"): 7549 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7550 if self._match_text_seq("SET", "INVISIBLE"): 7551 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7552 7553 self._match_text_seq("SET", "DATA") 7554 self._match_text_seq("TYPE") 7555 return self.expression( 7556 exp.AlterColumn, 7557 this=column, 7558 dtype=self._parse_types(), 7559 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7560 using=self._match(TokenType.USING) and self._parse_assignment(), 7561 ) 7562 7563 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7564 if self._match_texts(("ALL", "EVEN", "AUTO")): 7565 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7566 7567 self._match_text_seq("KEY", "DISTKEY") 7568 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7569 7570 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7571 if compound: 7572 self._match_text_seq("SORTKEY") 7573 7574 if self._match(TokenType.L_PAREN, advance=False): 7575 return self.expression( 7576 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7577 ) 7578 7579 self._match_texts(("AUTO", "NONE")) 7580 return self.expression( 7581 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7582 ) 7583 7584 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7585 index = self._index - 1 7586 7587 partition_exists = self._parse_exists() 7588 if self._match(TokenType.PARTITION, advance=False): 7589 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7590 7591 self._retreat(index) 7592 return self._parse_csv(self._parse_drop_column) 7593 7594 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7595 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7596 exists = self._parse_exists() 7597 old_column = self._parse_column() 7598 to = self._match_text_seq("TO") 7599 new_column = self._parse_column() 7600 7601 if old_column is None or to is None or new_column is None: 7602 return None 7603 7604 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7605 7606 self._match_text_seq("TO") 7607 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7608 7609 def _parse_alter_table_set(self) -> exp.AlterSet: 7610 alter_set = self.expression(exp.AlterSet) 7611 7612 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7613 "TABLE", "PROPERTIES" 7614 ): 7615 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7616 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7617 alter_set.set("expressions", [self._parse_assignment()]) 7618 elif self._match_texts(("LOGGED", "UNLOGGED")): 7619 alter_set.set("option", exp.var(self._prev.text.upper())) 7620 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7621 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7622 elif self._match_text_seq("LOCATION"): 7623 alter_set.set("location", self._parse_field()) 7624 elif self._match_text_seq("ACCESS", "METHOD"): 7625 alter_set.set("access_method", self._parse_field()) 7626 elif self._match_text_seq("TABLESPACE"): 7627 alter_set.set("tablespace", self._parse_field()) 7628 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7629 alter_set.set("file_format", [self._parse_field()]) 7630 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7631 alter_set.set("file_format", self._parse_wrapped_options()) 7632 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7633 alter_set.set("copy_options", self._parse_wrapped_options()) 7634 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7635 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7636 else: 7637 if self._match_text_seq("SERDE"): 7638 alter_set.set("serde", self._parse_field()) 7639 7640 properties = self._parse_wrapped(self._parse_properties, optional=True) 7641 alter_set.set("expressions", [properties]) 7642 7643 return alter_set 7644 7645 def _parse_alter_session(self) -> exp.AlterSession: 7646 """Parse ALTER SESSION SET/UNSET statements.""" 7647 if self._match(TokenType.SET): 7648 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7649 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7650 7651 self._match_text_seq("UNSET") 7652 expressions = self._parse_csv( 7653 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7654 ) 7655 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7656 7657 def _parse_alter(self) -> exp.Alter | exp.Command: 7658 start = self._prev 7659 7660 alter_token = self._match_set(self.ALTERABLES) and self._prev 7661 if not alter_token: 7662 return self._parse_as_command(start) 7663 7664 exists = self._parse_exists() 7665 only = self._match_text_seq("ONLY") 7666 7667 if alter_token.token_type == TokenType.SESSION: 7668 this = None 7669 check = None 7670 cluster = None 7671 else: 7672 this = self._parse_table(schema=True) 7673 check = self._match_text_seq("WITH", "CHECK") 7674 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7675 7676 if self._next: 7677 self._advance() 7678 7679 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7680 if parser: 7681 actions = ensure_list(parser(self)) 7682 not_valid = self._match_text_seq("NOT", "VALID") 7683 options = self._parse_csv(self._parse_property) 7684 7685 if not self._curr and actions: 7686 return self.expression( 7687 exp.Alter, 7688 this=this, 7689 kind=alter_token.text.upper(), 7690 exists=exists, 7691 actions=actions, 7692 only=only, 7693 options=options, 7694 cluster=cluster, 7695 not_valid=not_valid, 7696 check=check, 7697 ) 7698 7699 return self._parse_as_command(start) 7700 7701 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7702 start = self._prev 7703 # https://duckdb.org/docs/sql/statements/analyze 7704 if not self._curr: 7705 return self.expression(exp.Analyze) 7706 7707 options = [] 7708 while self._match_texts(self.ANALYZE_STYLES): 7709 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7710 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7711 else: 7712 options.append(self._prev.text.upper()) 7713 7714 this: t.Optional[exp.Expression] = None 7715 inner_expression: t.Optional[exp.Expression] = None 7716 7717 kind = self._curr and self._curr.text.upper() 7718 7719 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7720 this = self._parse_table_parts() 7721 elif self._match_text_seq("TABLES"): 7722 if self._match_set((TokenType.FROM, TokenType.IN)): 7723 kind = f"{kind} {self._prev.text.upper()}" 7724 this = self._parse_table(schema=True, is_db_reference=True) 7725 elif self._match_text_seq("DATABASE"): 7726 this = self._parse_table(schema=True, is_db_reference=True) 7727 elif self._match_text_seq("CLUSTER"): 7728 this = self._parse_table() 7729 # Try matching inner expr keywords before fallback to parse table. 7730 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7731 kind = None 7732 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7733 else: 7734 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7735 kind = None 7736 this = self._parse_table_parts() 7737 7738 partition = self._try_parse(self._parse_partition) 7739 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7740 return self._parse_as_command(start) 7741 7742 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7743 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7744 "WITH", "ASYNC", "MODE" 7745 ): 7746 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7747 else: 7748 mode = None 7749 7750 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7751 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7752 7753 properties = self._parse_properties() 7754 return self.expression( 7755 exp.Analyze, 7756 kind=kind, 7757 this=this, 7758 mode=mode, 7759 partition=partition, 7760 properties=properties, 7761 expression=inner_expression, 7762 options=options, 7763 ) 7764 7765 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7766 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7767 this = None 7768 kind = self._prev.text.upper() 7769 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7770 expressions = [] 7771 7772 if not self._match_text_seq("STATISTICS"): 7773 self.raise_error("Expecting token STATISTICS") 7774 7775 if self._match_text_seq("NOSCAN"): 7776 this = "NOSCAN" 7777 elif self._match(TokenType.FOR): 7778 if self._match_text_seq("ALL", "COLUMNS"): 7779 this = "FOR ALL COLUMNS" 7780 if self._match_texts("COLUMNS"): 7781 this = "FOR COLUMNS" 7782 expressions = self._parse_csv(self._parse_column_reference) 7783 elif self._match_text_seq("SAMPLE"): 7784 sample = self._parse_number() 7785 expressions = [ 7786 self.expression( 7787 exp.AnalyzeSample, 7788 sample=sample, 7789 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7790 ) 7791 ] 7792 7793 return self.expression( 7794 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7795 ) 7796 7797 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7798 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7799 kind = None 7800 this = None 7801 expression: t.Optional[exp.Expression] = None 7802 if self._match_text_seq("REF", "UPDATE"): 7803 kind = "REF" 7804 this = "UPDATE" 7805 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7806 this = "UPDATE SET DANGLING TO NULL" 7807 elif self._match_text_seq("STRUCTURE"): 7808 kind = "STRUCTURE" 7809 if self._match_text_seq("CASCADE", "FAST"): 7810 this = "CASCADE FAST" 7811 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7812 ("ONLINE", "OFFLINE") 7813 ): 7814 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7815 expression = self._parse_into() 7816 7817 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7818 7819 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7820 this = self._prev.text.upper() 7821 if self._match_text_seq("COLUMNS"): 7822 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7823 return None 7824 7825 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7826 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7827 if self._match_text_seq("STATISTICS"): 7828 return self.expression(exp.AnalyzeDelete, kind=kind) 7829 return None 7830 7831 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7832 if self._match_text_seq("CHAINED", "ROWS"): 7833 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7834 return None 7835 7836 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7837 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7838 this = self._prev.text.upper() 7839 expression: t.Optional[exp.Expression] = None 7840 expressions = [] 7841 update_options = None 7842 7843 if self._match_text_seq("HISTOGRAM", "ON"): 7844 expressions = self._parse_csv(self._parse_column_reference) 7845 with_expressions = [] 7846 while self._match(TokenType.WITH): 7847 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7848 if self._match_texts(("SYNC", "ASYNC")): 7849 if self._match_text_seq("MODE", advance=False): 7850 with_expressions.append(f"{self._prev.text.upper()} MODE") 7851 self._advance() 7852 else: 7853 buckets = self._parse_number() 7854 if self._match_text_seq("BUCKETS"): 7855 with_expressions.append(f"{buckets} BUCKETS") 7856 if with_expressions: 7857 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7858 7859 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7860 TokenType.UPDATE, advance=False 7861 ): 7862 update_options = self._prev.text.upper() 7863 self._advance() 7864 elif self._match_text_seq("USING", "DATA"): 7865 expression = self.expression(exp.UsingData, this=self._parse_string()) 7866 7867 return self.expression( 7868 exp.AnalyzeHistogram, 7869 this=this, 7870 expressions=expressions, 7871 expression=expression, 7872 update_options=update_options, 7873 ) 7874 7875 def _parse_merge(self) -> exp.Merge: 7876 self._match(TokenType.INTO) 7877 target = self._parse_table() 7878 7879 if target and self._match(TokenType.ALIAS, advance=False): 7880 target.set("alias", self._parse_table_alias()) 7881 7882 self._match(TokenType.USING) 7883 using = self._parse_table() 7884 7885 self._match(TokenType.ON) 7886 on = self._parse_assignment() 7887 7888 return self.expression( 7889 exp.Merge, 7890 this=target, 7891 using=using, 7892 on=on, 7893 whens=self._parse_when_matched(), 7894 returning=self._parse_returning(), 7895 ) 7896 7897 def _parse_when_matched(self) -> exp.Whens: 7898 whens = [] 7899 7900 while self._match(TokenType.WHEN): 7901 matched = not self._match(TokenType.NOT) 7902 self._match_text_seq("MATCHED") 7903 source = ( 7904 False 7905 if self._match_text_seq("BY", "TARGET") 7906 else self._match_text_seq("BY", "SOURCE") 7907 ) 7908 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7909 7910 self._match(TokenType.THEN) 7911 7912 if self._match(TokenType.INSERT): 7913 this = self._parse_star() 7914 if this: 7915 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7916 else: 7917 then = self.expression( 7918 exp.Insert, 7919 this=exp.var("ROW") 7920 if self._match_text_seq("ROW") 7921 else self._parse_value(values=False), 7922 expression=self._match_text_seq("VALUES") and self._parse_value(), 7923 ) 7924 elif self._match(TokenType.UPDATE): 7925 expressions = self._parse_star() 7926 if expressions: 7927 then = self.expression(exp.Update, expressions=expressions) 7928 else: 7929 then = self.expression( 7930 exp.Update, 7931 expressions=self._match(TokenType.SET) 7932 and self._parse_csv(self._parse_equality), 7933 ) 7934 elif self._match(TokenType.DELETE): 7935 then = self.expression(exp.Var, this=self._prev.text) 7936 else: 7937 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7938 7939 whens.append( 7940 self.expression( 7941 exp.When, 7942 matched=matched, 7943 source=source, 7944 condition=condition, 7945 then=then, 7946 ) 7947 ) 7948 return self.expression(exp.Whens, expressions=whens) 7949 7950 def _parse_show(self) -> t.Optional[exp.Expression]: 7951 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7952 if parser: 7953 return parser(self) 7954 return self._parse_as_command(self._prev) 7955 7956 def _parse_set_item_assignment( 7957 self, kind: t.Optional[str] = None 7958 ) -> t.Optional[exp.Expression]: 7959 index = self._index 7960 7961 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7962 return self._parse_set_transaction(global_=kind == "GLOBAL") 7963 7964 left = self._parse_primary() or self._parse_column() 7965 assignment_delimiter = self._match_texts(("=", "TO")) 7966 7967 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7968 self._retreat(index) 7969 return None 7970 7971 right = self._parse_statement() or self._parse_id_var() 7972 if isinstance(right, (exp.Column, exp.Identifier)): 7973 right = exp.var(right.name) 7974 7975 this = self.expression(exp.EQ, this=left, expression=right) 7976 return self.expression(exp.SetItem, this=this, kind=kind) 7977 7978 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7979 self._match_text_seq("TRANSACTION") 7980 characteristics = self._parse_csv( 7981 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7982 ) 7983 return self.expression( 7984 exp.SetItem, 7985 expressions=characteristics, 7986 kind="TRANSACTION", 7987 **{"global": global_}, # type: ignore 7988 ) 7989 7990 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7991 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7992 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7993 7994 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7995 index = self._index 7996 set_ = self.expression( 7997 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7998 ) 7999 8000 if self._curr: 8001 self._retreat(index) 8002 return self._parse_as_command(self._prev) 8003 8004 return set_ 8005 8006 def _parse_var_from_options( 8007 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8008 ) -> t.Optional[exp.Var]: 8009 start = self._curr 8010 if not start: 8011 return None 8012 8013 option = start.text.upper() 8014 continuations = options.get(option) 8015 8016 index = self._index 8017 self._advance() 8018 for keywords in continuations or []: 8019 if isinstance(keywords, str): 8020 keywords = (keywords,) 8021 8022 if self._match_text_seq(*keywords): 8023 option = f"{option} {' '.join(keywords)}" 8024 break 8025 else: 8026 if continuations or continuations is None: 8027 if raise_unmatched: 8028 self.raise_error(f"Unknown option {option}") 8029 8030 self._retreat(index) 8031 return None 8032 8033 return exp.var(option) 8034 8035 def _parse_as_command(self, start: Token) -> exp.Command: 8036 while self._curr: 8037 self._advance() 8038 text = self._find_sql(start, self._prev) 8039 size = len(start.text) 8040 self._warn_unsupported() 8041 return exp.Command(this=text[:size], expression=text[size:]) 8042 8043 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8044 settings = [] 8045 8046 self._match_l_paren() 8047 kind = self._parse_id_var() 8048 8049 if self._match(TokenType.L_PAREN): 8050 while True: 8051 key = self._parse_id_var() 8052 value = self._parse_primary() 8053 if not key and value is None: 8054 break 8055 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8056 self._match(TokenType.R_PAREN) 8057 8058 self._match_r_paren() 8059 8060 return self.expression( 8061 exp.DictProperty, 8062 this=this, 8063 kind=kind.this if kind else None, 8064 settings=settings, 8065 ) 8066 8067 def _parse_dict_range(self, this: str) -> exp.DictRange: 8068 self._match_l_paren() 8069 has_min = self._match_text_seq("MIN") 8070 if has_min: 8071 min = self._parse_var() or self._parse_primary() 8072 self._match_text_seq("MAX") 8073 max = self._parse_var() or self._parse_primary() 8074 else: 8075 max = self._parse_var() or self._parse_primary() 8076 min = exp.Literal.number(0) 8077 self._match_r_paren() 8078 return self.expression(exp.DictRange, this=this, min=min, max=max) 8079 8080 def _parse_comprehension( 8081 self, this: t.Optional[exp.Expression] 8082 ) -> t.Optional[exp.Comprehension]: 8083 index = self._index 8084 expression = self._parse_column() 8085 if not self._match(TokenType.IN): 8086 self._retreat(index - 1) 8087 return None 8088 iterator = self._parse_column() 8089 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8090 return self.expression( 8091 exp.Comprehension, 8092 this=this, 8093 expression=expression, 8094 iterator=iterator, 8095 condition=condition, 8096 ) 8097 8098 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8099 if self._match(TokenType.HEREDOC_STRING): 8100 return self.expression(exp.Heredoc, this=self._prev.text) 8101 8102 if not self._match_text_seq("$"): 8103 return None 8104 8105 tags = ["$"] 8106 tag_text = None 8107 8108 if self._is_connected(): 8109 self._advance() 8110 tags.append(self._prev.text.upper()) 8111 else: 8112 self.raise_error("No closing $ found") 8113 8114 if tags[-1] != "$": 8115 if self._is_connected() and self._match_text_seq("$"): 8116 tag_text = tags[-1] 8117 tags.append("$") 8118 else: 8119 self.raise_error("No closing $ found") 8120 8121 heredoc_start = self._curr 8122 8123 while self._curr: 8124 if self._match_text_seq(*tags, advance=False): 8125 this = self._find_sql(heredoc_start, self._prev) 8126 self._advance(len(tags)) 8127 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8128 8129 self._advance() 8130 8131 self.raise_error(f"No closing {''.join(tags)} found") 8132 return None 8133 8134 def _find_parser( 8135 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8136 ) -> t.Optional[t.Callable]: 8137 if not self._curr: 8138 return None 8139 8140 index = self._index 8141 this = [] 8142 while True: 8143 # The current token might be multiple words 8144 curr = self._curr.text.upper() 8145 key = curr.split(" ") 8146 this.append(curr) 8147 8148 self._advance() 8149 result, trie = in_trie(trie, key) 8150 if result == TrieResult.FAILED: 8151 break 8152 8153 if result == TrieResult.EXISTS: 8154 subparser = parsers[" ".join(this)] 8155 return subparser 8156 8157 self._retreat(index) 8158 return None 8159 8160 def _match(self, token_type, advance=True, expression=None): 8161 if not self._curr: 8162 return None 8163 8164 if self._curr.token_type == token_type: 8165 if advance: 8166 self._advance() 8167 self._add_comments(expression) 8168 return True 8169 8170 return None 8171 8172 def _match_set(self, types, advance=True): 8173 if not self._curr: 8174 return None 8175 8176 if self._curr.token_type in types: 8177 if advance: 8178 self._advance() 8179 return True 8180 8181 return None 8182 8183 def _match_pair(self, token_type_a, token_type_b, advance=True): 8184 if not self._curr or not self._next: 8185 return None 8186 8187 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8188 if advance: 8189 self._advance(2) 8190 return True 8191 8192 return None 8193 8194 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8195 if not self._match(TokenType.L_PAREN, expression=expression): 8196 self.raise_error("Expecting (") 8197 8198 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8199 if not self._match(TokenType.R_PAREN, expression=expression): 8200 self.raise_error("Expecting )") 8201 8202 def _match_texts(self, texts, advance=True): 8203 if ( 8204 self._curr 8205 and self._curr.token_type != TokenType.STRING 8206 and self._curr.text.upper() in texts 8207 ): 8208 if advance: 8209 self._advance() 8210 return True 8211 return None 8212 8213 def _match_text_seq(self, *texts, advance=True): 8214 index = self._index 8215 for text in texts: 8216 if ( 8217 self._curr 8218 and self._curr.token_type != TokenType.STRING 8219 and self._curr.text.upper() == text 8220 ): 8221 self._advance() 8222 else: 8223 self._retreat(index) 8224 return None 8225 8226 if not advance: 8227 self._retreat(index) 8228 8229 return True 8230 8231 def _replace_lambda( 8232 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8233 ) -> t.Optional[exp.Expression]: 8234 if not node: 8235 return node 8236 8237 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8238 8239 for column in node.find_all(exp.Column): 8240 typ = lambda_types.get(column.parts[0].name) 8241 if typ is not None: 8242 dot_or_id = column.to_dot() if column.table else column.this 8243 8244 if typ: 8245 dot_or_id = self.expression( 8246 exp.Cast, 8247 this=dot_or_id, 8248 to=typ, 8249 ) 8250 8251 parent = column.parent 8252 8253 while isinstance(parent, exp.Dot): 8254 if not isinstance(parent.parent, exp.Dot): 8255 parent.replace(dot_or_id) 8256 break 8257 parent = parent.parent 8258 else: 8259 if column is node: 8260 node = dot_or_id 8261 else: 8262 column.replace(dot_or_id) 8263 return node 8264 8265 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8266 start = self._prev 8267 8268 # Not to be confused with TRUNCATE(number, decimals) function call 8269 if self._match(TokenType.L_PAREN): 8270 self._retreat(self._index - 2) 8271 return self._parse_function() 8272 8273 # Clickhouse supports TRUNCATE DATABASE as well 8274 is_database = self._match(TokenType.DATABASE) 8275 8276 self._match(TokenType.TABLE) 8277 8278 exists = self._parse_exists(not_=False) 8279 8280 expressions = self._parse_csv( 8281 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8282 ) 8283 8284 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8285 8286 if self._match_text_seq("RESTART", "IDENTITY"): 8287 identity = "RESTART" 8288 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8289 identity = "CONTINUE" 8290 else: 8291 identity = None 8292 8293 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8294 option = self._prev.text 8295 else: 8296 option = None 8297 8298 partition = self._parse_partition() 8299 8300 # Fallback case 8301 if self._curr: 8302 return self._parse_as_command(start) 8303 8304 return self.expression( 8305 exp.TruncateTable, 8306 expressions=expressions, 8307 is_database=is_database, 8308 exists=exists, 8309 cluster=cluster, 8310 identity=identity, 8311 option=option, 8312 partition=partition, 8313 ) 8314 8315 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8316 this = self._parse_ordered(self._parse_opclass) 8317 8318 if not self._match(TokenType.WITH): 8319 return this 8320 8321 op = self._parse_var(any_token=True) 8322 8323 return self.expression(exp.WithOperator, this=this, op=op) 8324 8325 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8326 self._match(TokenType.EQ) 8327 self._match(TokenType.L_PAREN) 8328 8329 opts: t.List[t.Optional[exp.Expression]] = [] 8330 option: exp.Expression | None 8331 while self._curr and not self._match(TokenType.R_PAREN): 8332 if self._match_text_seq("FORMAT_NAME", "="): 8333 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8334 option = self._parse_format_name() 8335 else: 8336 option = self._parse_property() 8337 8338 if option is None: 8339 self.raise_error("Unable to parse option") 8340 break 8341 8342 opts.append(option) 8343 8344 return opts 8345 8346 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8347 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8348 8349 options = [] 8350 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8351 option = self._parse_var(any_token=True) 8352 prev = self._prev.text.upper() 8353 8354 # Different dialects might separate options and values by white space, "=" and "AS" 8355 self._match(TokenType.EQ) 8356 self._match(TokenType.ALIAS) 8357 8358 param = self.expression(exp.CopyParameter, this=option) 8359 8360 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8361 TokenType.L_PAREN, advance=False 8362 ): 8363 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8364 param.set("expressions", self._parse_wrapped_options()) 8365 elif prev == "FILE_FORMAT": 8366 # T-SQL's external file format case 8367 param.set("expression", self._parse_field()) 8368 else: 8369 param.set("expression", self._parse_unquoted_field()) 8370 8371 options.append(param) 8372 self._match(sep) 8373 8374 return options 8375 8376 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8377 expr = self.expression(exp.Credentials) 8378 8379 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8380 expr.set("storage", self._parse_field()) 8381 if self._match_text_seq("CREDENTIALS"): 8382 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8383 creds = ( 8384 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8385 ) 8386 expr.set("credentials", creds) 8387 if self._match_text_seq("ENCRYPTION"): 8388 expr.set("encryption", self._parse_wrapped_options()) 8389 if self._match_text_seq("IAM_ROLE"): 8390 expr.set("iam_role", self._parse_field()) 8391 if self._match_text_seq("REGION"): 8392 expr.set("region", self._parse_field()) 8393 8394 return expr 8395 8396 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8397 return self._parse_field() 8398 8399 def _parse_copy(self) -> exp.Copy | exp.Command: 8400 start = self._prev 8401 8402 self._match(TokenType.INTO) 8403 8404 this = ( 8405 self._parse_select(nested=True, parse_subquery_alias=False) 8406 if self._match(TokenType.L_PAREN, advance=False) 8407 else self._parse_table(schema=True) 8408 ) 8409 8410 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8411 8412 files = self._parse_csv(self._parse_file_location) 8413 if self._match(TokenType.EQ, advance=False): 8414 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8415 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8416 # list via `_parse_wrapped(..)` below. 8417 self._advance(-1) 8418 files = [] 8419 8420 credentials = self._parse_credentials() 8421 8422 self._match_text_seq("WITH") 8423 8424 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8425 8426 # Fallback case 8427 if self._curr: 8428 return self._parse_as_command(start) 8429 8430 return self.expression( 8431 exp.Copy, 8432 this=this, 8433 kind=kind, 8434 credentials=credentials, 8435 files=files, 8436 params=params, 8437 ) 8438 8439 def _parse_normalize(self) -> exp.Normalize: 8440 return self.expression( 8441 exp.Normalize, 8442 this=self._parse_bitwise(), 8443 form=self._match(TokenType.COMMA) and self._parse_var(), 8444 ) 8445 8446 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8447 args = self._parse_csv(lambda: self._parse_lambda()) 8448 8449 this = seq_get(args, 0) 8450 decimals = seq_get(args, 1) 8451 8452 return expr_type( 8453 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8454 ) 8455 8456 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8457 star_token = self._prev 8458 8459 if self._match_text_seq("COLUMNS", "(", advance=False): 8460 this = self._parse_function() 8461 if isinstance(this, exp.Columns): 8462 this.set("unpack", True) 8463 return this 8464 8465 return self.expression( 8466 exp.Star, 8467 **{ # type: ignore 8468 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8469 "replace": self._parse_star_op("REPLACE"), 8470 "rename": self._parse_star_op("RENAME"), 8471 }, 8472 ).update_positions(star_token) 8473 8474 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8475 privilege_parts = [] 8476 8477 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8478 # (end of privilege list) or L_PAREN (start of column list) are met 8479 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8480 privilege_parts.append(self._curr.text.upper()) 8481 self._advance() 8482 8483 this = exp.var(" ".join(privilege_parts)) 8484 expressions = ( 8485 self._parse_wrapped_csv(self._parse_column) 8486 if self._match(TokenType.L_PAREN, advance=False) 8487 else None 8488 ) 8489 8490 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8491 8492 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8493 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8494 principal = self._parse_id_var() 8495 8496 if not principal: 8497 return None 8498 8499 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8500 8501 def _parse_grant_revoke_common( 8502 self, 8503 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8504 privileges = self._parse_csv(self._parse_grant_privilege) 8505 8506 self._match(TokenType.ON) 8507 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8508 8509 # Attempt to parse the securable e.g. MySQL allows names 8510 # such as "foo.*", "*.*" which are not easily parseable yet 8511 securable = self._try_parse(self._parse_table_parts) 8512 8513 return privileges, kind, securable 8514 8515 def _parse_grant(self) -> exp.Grant | exp.Command: 8516 start = self._prev 8517 8518 privileges, kind, securable = self._parse_grant_revoke_common() 8519 8520 if not securable or not self._match_text_seq("TO"): 8521 return self._parse_as_command(start) 8522 8523 principals = self._parse_csv(self._parse_grant_principal) 8524 8525 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8526 8527 if self._curr: 8528 return self._parse_as_command(start) 8529 8530 return self.expression( 8531 exp.Grant, 8532 privileges=privileges, 8533 kind=kind, 8534 securable=securable, 8535 principals=principals, 8536 grant_option=grant_option, 8537 ) 8538 8539 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8540 start = self._prev 8541 8542 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8543 8544 privileges, kind, securable = self._parse_grant_revoke_common() 8545 8546 if not securable or not self._match_text_seq("FROM"): 8547 return self._parse_as_command(start) 8548 8549 principals = self._parse_csv(self._parse_grant_principal) 8550 8551 cascade = None 8552 if self._match_texts(("CASCADE", "RESTRICT")): 8553 cascade = self._prev.text.upper() 8554 8555 if self._curr: 8556 return self._parse_as_command(start) 8557 8558 return self.expression( 8559 exp.Revoke, 8560 privileges=privileges, 8561 kind=kind, 8562 securable=securable, 8563 principals=principals, 8564 grant_option=grant_option, 8565 cascade=cascade, 8566 ) 8567 8568 def _parse_overlay(self) -> exp.Overlay: 8569 return self.expression( 8570 exp.Overlay, 8571 **{ # type: ignore 8572 "this": self._parse_bitwise(), 8573 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8574 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8575 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8576 }, 8577 ) 8578 8579 def _parse_format_name(self) -> exp.Property: 8580 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8581 # for FILE_FORMAT = <format_name> 8582 return self.expression( 8583 exp.Property, 8584 this=exp.var("FORMAT_NAME"), 8585 value=self._parse_string() or self._parse_table_parts(), 8586 ) 8587 8588 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8589 args: t.List[exp.Expression] = [] 8590 8591 if self._match(TokenType.DISTINCT): 8592 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8593 self._match(TokenType.COMMA) 8594 8595 args.extend(self._parse_csv(self._parse_assignment)) 8596 8597 return self.expression( 8598 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8599 ) 8600 8601 def _identifier_expression( 8602 self, token: t.Optional[Token] = None, **kwargs: t.Any 8603 ) -> exp.Identifier: 8604 token = token or self._prev 8605 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8606 expression.update_positions(token) 8607 return expression 8608 8609 def _build_pipe_cte( 8610 self, 8611 query: exp.Query, 8612 expressions: t.List[exp.Expression], 8613 alias_cte: t.Optional[exp.TableAlias] = None, 8614 ) -> exp.Select: 8615 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8616 if alias_cte: 8617 new_cte = alias_cte 8618 else: 8619 self._pipe_cte_counter += 1 8620 new_cte = f"__tmp{self._pipe_cte_counter}" 8621 8622 with_ = query.args.get("with") 8623 ctes = with_.pop() if with_ else None 8624 8625 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8626 if ctes: 8627 new_select.set("with", ctes) 8628 8629 return new_select.with_(new_cte, as_=query, copy=False) 8630 8631 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8632 select = self._parse_select(consume_pipe=False) 8633 if not select: 8634 return query 8635 8636 return self._build_pipe_cte( 8637 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8638 ) 8639 8640 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8641 limit = self._parse_limit() 8642 offset = self._parse_offset() 8643 if limit: 8644 curr_limit = query.args.get("limit", limit) 8645 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8646 query.limit(limit, copy=False) 8647 if offset: 8648 curr_offset = query.args.get("offset") 8649 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8650 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8651 8652 return query 8653 8654 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8655 this = self._parse_assignment() 8656 if self._match_text_seq("GROUP", "AND", advance=False): 8657 return this 8658 8659 this = self._parse_alias(this) 8660 8661 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8662 return self._parse_ordered(lambda: this) 8663 8664 return this 8665 8666 def _parse_pipe_syntax_aggregate_group_order_by( 8667 self, query: exp.Select, group_by_exists: bool = True 8668 ) -> exp.Select: 8669 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8670 aggregates_or_groups, orders = [], [] 8671 for element in expr: 8672 if isinstance(element, exp.Ordered): 8673 this = element.this 8674 if isinstance(this, exp.Alias): 8675 element.set("this", this.args["alias"]) 8676 orders.append(element) 8677 else: 8678 this = element 8679 aggregates_or_groups.append(this) 8680 8681 if group_by_exists: 8682 query.select(*aggregates_or_groups, copy=False).group_by( 8683 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8684 copy=False, 8685 ) 8686 else: 8687 query.select(*aggregates_or_groups, append=False, copy=False) 8688 8689 if orders: 8690 return query.order_by(*orders, append=False, copy=False) 8691 8692 return query 8693 8694 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8695 self._match_text_seq("AGGREGATE") 8696 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8697 8698 if self._match(TokenType.GROUP_BY) or ( 8699 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8700 ): 8701 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8702 8703 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8704 8705 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8706 first_setop = self.parse_set_operation(this=query) 8707 if not first_setop: 8708 return None 8709 8710 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8711 expr = self._parse_paren() 8712 return expr.assert_is(exp.Subquery).unnest() if expr else None 8713 8714 first_setop.this.pop() 8715 8716 setops = [ 8717 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8718 *self._parse_csv(_parse_and_unwrap_query), 8719 ] 8720 8721 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8722 with_ = query.args.get("with") 8723 ctes = with_.pop() if with_ else None 8724 8725 if isinstance(first_setop, exp.Union): 8726 query = query.union(*setops, copy=False, **first_setop.args) 8727 elif isinstance(first_setop, exp.Except): 8728 query = query.except_(*setops, copy=False, **first_setop.args) 8729 else: 8730 query = query.intersect(*setops, copy=False, **first_setop.args) 8731 8732 query.set("with", ctes) 8733 8734 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8735 8736 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8737 join = self._parse_join() 8738 if not join: 8739 return None 8740 8741 if isinstance(query, exp.Select): 8742 return query.join(join, copy=False) 8743 8744 return query 8745 8746 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8747 pivots = self._parse_pivots() 8748 if not pivots: 8749 return query 8750 8751 from_ = query.args.get("from") 8752 if from_: 8753 from_.this.set("pivots", pivots) 8754 8755 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8756 8757 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8758 self._match_text_seq("EXTEND") 8759 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8760 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8761 8762 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8763 sample = self._parse_table_sample() 8764 8765 with_ = query.args.get("with") 8766 if with_: 8767 with_.expressions[-1].this.set("sample", sample) 8768 else: 8769 query.set("sample", sample) 8770 8771 return query 8772 8773 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8774 if isinstance(query, exp.Subquery): 8775 query = exp.select("*").from_(query, copy=False) 8776 8777 if not query.args.get("from"): 8778 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8779 8780 while self._match(TokenType.PIPE_GT): 8781 start = self._curr 8782 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8783 if not parser: 8784 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8785 # keywords, making it tricky to disambiguate them without lookahead. The approach 8786 # here is to try and parse a set operation and if that fails, then try to parse a 8787 # join operator. If that fails as well, then the operator is not supported. 8788 parsed_query = self._parse_pipe_syntax_set_operator(query) 8789 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8790 if not parsed_query: 8791 self._retreat(start) 8792 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8793 break 8794 query = parsed_query 8795 else: 8796 query = parser(self, query) 8797 8798 return query 8799 8800 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8801 vars = self._parse_csv(self._parse_id_var) 8802 if not vars: 8803 return None 8804 8805 return self.expression( 8806 exp.DeclareItem, 8807 this=vars, 8808 kind=self._parse_types(), 8809 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8810 ) 8811 8812 def _parse_declare(self) -> exp.Declare | exp.Command: 8813 start = self._prev 8814 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8815 8816 if not expressions or self._curr: 8817 return self._parse_as_command(start) 8818 8819 return self.expression(exp.Declare, expressions=expressions) 8820 8821 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8822 exp_class = exp.Cast if strict else exp.TryCast 8823 8824 if exp_class == exp.TryCast: 8825 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8826 8827 return self.expression(exp_class, **kwargs) 8828 8829 def _parse_json_value(self) -> exp.JSONValue: 8830 this = self._parse_bitwise() 8831 self._match(TokenType.COMMA) 8832 path = self._parse_bitwise() 8833 8834 returning = self._match(TokenType.RETURNING) and self._parse_type() 8835 8836 return self.expression( 8837 exp.JSONValue, 8838 this=this, 8839 path=self.dialect.to_json_path(path), 8840 returning=returning, 8841 on_condition=self._parse_on_condition(), 8842 ) 8843 8844 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8845 def concat_exprs( 8846 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8847 ) -> exp.Expression: 8848 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8849 concat_exprs = [ 8850 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8851 ] 8852 node.set("expressions", concat_exprs) 8853 return node 8854 if len(exprs) == 1: 8855 return exprs[0] 8856 return self.expression(exp.Concat, expressions=args, safe=True) 8857 8858 args = self._parse_csv(self._parse_lambda) 8859 8860 if args: 8861 order = args[-1] if isinstance(args[-1], exp.Order) else None 8862 8863 if order: 8864 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8865 # remove 'expr' from exp.Order and add it back to args 8866 args[-1] = order.this 8867 order.set("this", concat_exprs(order.this, args)) 8868 8869 this = order or concat_exprs(args[0], args) 8870 else: 8871 this = None 8872 8873 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8874 8875 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.Group: lambda self: self._parse_group(), 813 exp.Having: lambda self: self._parse_having(), 814 exp.Hint: lambda self: self._parse_hint_body(), 815 exp.Identifier: lambda self: self._parse_id_var(), 816 exp.Join: lambda self: self._parse_join(), 817 exp.Lambda: lambda self: self._parse_lambda(), 818 exp.Lateral: lambda self: self._parse_lateral(), 819 exp.Limit: lambda self: self._parse_limit(), 820 exp.Offset: lambda self: self._parse_offset(), 821 exp.Order: lambda self: self._parse_order(), 822 exp.Ordered: lambda self: self._parse_ordered(), 823 exp.Properties: lambda self: self._parse_properties(), 824 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 825 exp.Qualify: lambda self: self._parse_qualify(), 826 exp.Returning: lambda self: self._parse_returning(), 827 exp.Select: lambda self: self._parse_select(), 828 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 829 exp.Table: lambda self: self._parse_table_parts(), 830 exp.TableAlias: lambda self: self._parse_table_alias(), 831 exp.Tuple: lambda self: self._parse_value(values=False), 832 exp.Whens: lambda self: self._parse_when_matched(), 833 exp.Where: lambda self: self._parse_where(), 834 exp.Window: lambda self: self._parse_named_window(), 835 exp.With: lambda self: self._parse_with(), 836 "JOIN_TYPE": lambda self: self._parse_join_parts(), 837 } 838 839 STATEMENT_PARSERS = { 840 TokenType.ALTER: lambda self: self._parse_alter(), 841 TokenType.ANALYZE: lambda self: self._parse_analyze(), 842 TokenType.BEGIN: lambda self: self._parse_transaction(), 843 TokenType.CACHE: lambda self: self._parse_cache(), 844 TokenType.COMMENT: lambda self: self._parse_comment(), 845 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 846 TokenType.COPY: lambda self: self._parse_copy(), 847 TokenType.CREATE: lambda self: self._parse_create(), 848 TokenType.DELETE: lambda self: self._parse_delete(), 849 TokenType.DESC: lambda self: self._parse_describe(), 850 TokenType.DESCRIBE: lambda self: self._parse_describe(), 851 TokenType.DROP: lambda self: self._parse_drop(), 852 TokenType.GRANT: lambda self: self._parse_grant(), 853 TokenType.REVOKE: lambda self: self._parse_revoke(), 854 TokenType.INSERT: lambda self: self._parse_insert(), 855 TokenType.KILL: lambda self: self._parse_kill(), 856 TokenType.LOAD: lambda self: self._parse_load(), 857 TokenType.MERGE: lambda self: self._parse_merge(), 858 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 859 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 860 TokenType.REFRESH: lambda self: self._parse_refresh(), 861 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 862 TokenType.SET: lambda self: self._parse_set(), 863 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 864 TokenType.UNCACHE: lambda self: self._parse_uncache(), 865 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 866 TokenType.UPDATE: lambda self: self._parse_update(), 867 TokenType.USE: lambda self: self._parse_use(), 868 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 869 } 870 871 UNARY_PARSERS = { 872 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 873 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 874 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 875 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 876 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 877 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 878 } 879 880 STRING_PARSERS = { 881 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 882 exp.RawString, this=token.text 883 ), 884 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 885 exp.National, this=token.text 886 ), 887 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 888 TokenType.STRING: lambda self, token: self.expression( 889 exp.Literal, this=token.text, is_string=True 890 ), 891 TokenType.UNICODE_STRING: lambda self, token: self.expression( 892 exp.UnicodeString, 893 this=token.text, 894 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 895 ), 896 } 897 898 NUMERIC_PARSERS = { 899 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 900 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 901 TokenType.HEX_STRING: lambda self, token: self.expression( 902 exp.HexString, 903 this=token.text, 904 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 905 ), 906 TokenType.NUMBER: lambda self, token: self.expression( 907 exp.Literal, this=token.text, is_string=False 908 ), 909 } 910 911 PRIMARY_PARSERS = { 912 **STRING_PARSERS, 913 **NUMERIC_PARSERS, 914 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 915 TokenType.NULL: lambda self, _: self.expression(exp.Null), 916 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 917 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 918 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 919 TokenType.STAR: lambda self, _: self._parse_star_ops(), 920 } 921 922 PLACEHOLDER_PARSERS = { 923 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 924 TokenType.PARAMETER: lambda self: self._parse_parameter(), 925 TokenType.COLON: lambda self: ( 926 self.expression(exp.Placeholder, this=self._prev.text) 927 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 928 else None 929 ), 930 } 931 932 RANGE_PARSERS = { 933 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 934 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 935 TokenType.GLOB: binary_range_parser(exp.Glob), 936 TokenType.ILIKE: binary_range_parser(exp.ILike), 937 TokenType.IN: lambda self, this: self._parse_in(this), 938 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 939 TokenType.IS: lambda self, this: self._parse_is(this), 940 TokenType.LIKE: binary_range_parser(exp.Like), 941 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 942 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 943 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 944 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 945 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 946 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 947 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 948 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 949 } 950 951 PIPE_SYNTAX_TRANSFORM_PARSERS = { 952 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 953 "AS": lambda self, query: self._build_pipe_cte( 954 query, [exp.Star()], self._parse_table_alias() 955 ), 956 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 957 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 958 "ORDER BY": lambda self, query: query.order_by( 959 self._parse_order(), append=False, copy=False 960 ), 961 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 962 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 963 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 964 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 966 } 967 968 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 969 "ALLOWED_VALUES": lambda self: self.expression( 970 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 971 ), 972 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 973 "AUTO": lambda self: self._parse_auto_property(), 974 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 975 "BACKUP": lambda self: self.expression( 976 exp.BackupProperty, this=self._parse_var(any_token=True) 977 ), 978 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 979 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 980 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 981 "CHECKSUM": lambda self: self._parse_checksum(), 982 "CLUSTER BY": lambda self: self._parse_cluster(), 983 "CLUSTERED": lambda self: self._parse_clustered_by(), 984 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 985 exp.CollateProperty, **kwargs 986 ), 987 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 988 "CONTAINS": lambda self: self._parse_contains_property(), 989 "COPY": lambda self: self._parse_copy_property(), 990 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 991 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 992 "DEFINER": lambda self: self._parse_definer(), 993 "DETERMINISTIC": lambda self: self.expression( 994 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 995 ), 996 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 997 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 998 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 999 "DISTKEY": lambda self: self._parse_distkey(), 1000 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1001 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1002 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1003 "ENVIRONMENT": lambda self: self.expression( 1004 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1005 ), 1006 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1007 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1008 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1009 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1010 "FREESPACE": lambda self: self._parse_freespace(), 1011 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1012 "HEAP": lambda self: self.expression(exp.HeapProperty), 1013 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1014 "IMMUTABLE": lambda self: self.expression( 1015 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1016 ), 1017 "INHERITS": lambda self: self.expression( 1018 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1019 ), 1020 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1021 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1022 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1023 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1024 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1025 "LIKE": lambda self: self._parse_create_like(), 1026 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1027 "LOCK": lambda self: self._parse_locking(), 1028 "LOCKING": lambda self: self._parse_locking(), 1029 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1030 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1031 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1032 "MODIFIES": lambda self: self._parse_modifies_property(), 1033 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1034 "NO": lambda self: self._parse_no_property(), 1035 "ON": lambda self: self._parse_on_property(), 1036 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1037 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1038 "PARTITION": lambda self: self._parse_partitioned_of(), 1039 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1040 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1041 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1043 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1044 "READS": lambda self: self._parse_reads_property(), 1045 "REMOTE": lambda self: self._parse_remote_with_connection(), 1046 "RETURNS": lambda self: self._parse_returns(), 1047 "STRICT": lambda self: self.expression(exp.StrictProperty), 1048 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1049 "ROW": lambda self: self._parse_row(), 1050 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1051 "SAMPLE": lambda self: self.expression( 1052 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1053 ), 1054 "SECURE": lambda self: self.expression(exp.SecureProperty), 1055 "SECURITY": lambda self: self._parse_security(), 1056 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1057 "SETTINGS": lambda self: self._parse_settings_property(), 1058 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1059 "SORTKEY": lambda self: self._parse_sortkey(), 1060 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1061 "STABLE": lambda self: self.expression( 1062 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1063 ), 1064 "STORED": lambda self: self._parse_stored(), 1065 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1066 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1067 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1068 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1069 "TO": lambda self: self._parse_to_table(), 1070 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1071 "TRANSFORM": lambda self: self.expression( 1072 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1073 ), 1074 "TTL": lambda self: self._parse_ttl(), 1075 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1076 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1077 "VOLATILE": lambda self: self._parse_volatile_property(), 1078 "WITH": lambda self: self._parse_with_property(), 1079 } 1080 1081 CONSTRAINT_PARSERS = { 1082 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1083 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1084 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1085 "CHARACTER SET": lambda self: self.expression( 1086 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1087 ), 1088 "CHECK": lambda self: self.expression( 1089 exp.CheckColumnConstraint, 1090 this=self._parse_wrapped(self._parse_assignment), 1091 enforced=self._match_text_seq("ENFORCED"), 1092 ), 1093 "COLLATE": lambda self: self.expression( 1094 exp.CollateColumnConstraint, 1095 this=self._parse_identifier() or self._parse_column(), 1096 ), 1097 "COMMENT": lambda self: self.expression( 1098 exp.CommentColumnConstraint, this=self._parse_string() 1099 ), 1100 "COMPRESS": lambda self: self._parse_compress(), 1101 "CLUSTERED": lambda self: self.expression( 1102 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1103 ), 1104 "NONCLUSTERED": lambda self: self.expression( 1105 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "DEFAULT": lambda self: self.expression( 1108 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1109 ), 1110 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1111 "EPHEMERAL": lambda self: self.expression( 1112 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1113 ), 1114 "EXCLUDE": lambda self: self.expression( 1115 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1116 ), 1117 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1118 "FORMAT": lambda self: self.expression( 1119 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1120 ), 1121 "GENERATED": lambda self: self._parse_generated_as_identity(), 1122 "IDENTITY": lambda self: self._parse_auto_increment(), 1123 "INLINE": lambda self: self._parse_inline(), 1124 "LIKE": lambda self: self._parse_create_like(), 1125 "NOT": lambda self: self._parse_not_constraint(), 1126 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1127 "ON": lambda self: ( 1128 self._match(TokenType.UPDATE) 1129 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1130 ) 1131 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1132 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1133 "PERIOD": lambda self: self._parse_period_for_system_time(), 1134 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1135 "REFERENCES": lambda self: self._parse_references(match=False), 1136 "TITLE": lambda self: self.expression( 1137 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1138 ), 1139 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1140 "UNIQUE": lambda self: self._parse_unique(), 1141 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1142 "WATERMARK": lambda self: self.expression( 1143 exp.WatermarkColumnConstraint, 1144 this=self._match(TokenType.FOR) and self._parse_column(), 1145 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1146 ), 1147 "WITH": lambda self: self.expression( 1148 exp.Properties, expressions=self._parse_wrapped_properties() 1149 ), 1150 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1151 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1152 } 1153 1154 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1155 if not self._match(TokenType.L_PAREN, advance=False): 1156 # Partitioning by bucket or truncate follows the syntax: 1157 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1158 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1159 self._retreat(self._index - 1) 1160 return None 1161 1162 klass = ( 1163 exp.PartitionedByBucket 1164 if self._prev.text.upper() == "BUCKET" 1165 else exp.PartitionByTruncate 1166 ) 1167 1168 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1169 this, expression = seq_get(args, 0), seq_get(args, 1) 1170 1171 if isinstance(this, exp.Literal): 1172 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1173 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1174 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1175 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1176 # 1177 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1178 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1179 this, expression = expression, this 1180 1181 return self.expression(klass, this=this, expression=expression) 1182 1183 ALTER_PARSERS = { 1184 "ADD": lambda self: self._parse_alter_table_add(), 1185 "AS": lambda self: self._parse_select(), 1186 "ALTER": lambda self: self._parse_alter_table_alter(), 1187 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1188 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1189 "DROP": lambda self: self._parse_alter_table_drop(), 1190 "RENAME": lambda self: self._parse_alter_table_rename(), 1191 "SET": lambda self: self._parse_alter_table_set(), 1192 "SWAP": lambda self: self.expression( 1193 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1194 ), 1195 } 1196 1197 ALTER_ALTER_PARSERS = { 1198 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1199 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1200 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1201 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1202 } 1203 1204 SCHEMA_UNNAMED_CONSTRAINTS = { 1205 "CHECK", 1206 "EXCLUDE", 1207 "FOREIGN KEY", 1208 "LIKE", 1209 "PERIOD", 1210 "PRIMARY KEY", 1211 "UNIQUE", 1212 "WATERMARK", 1213 "BUCKET", 1214 "TRUNCATE", 1215 } 1216 1217 NO_PAREN_FUNCTION_PARSERS = { 1218 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1219 "CASE": lambda self: self._parse_case(), 1220 "CONNECT_BY_ROOT": lambda self: self.expression( 1221 exp.ConnectByRoot, this=self._parse_column() 1222 ), 1223 "IF": lambda self: self._parse_if(), 1224 } 1225 1226 INVALID_FUNC_NAME_TOKENS = { 1227 TokenType.IDENTIFIER, 1228 TokenType.STRING, 1229 } 1230 1231 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1232 1233 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1234 1235 FUNCTION_PARSERS = { 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1238 }, 1239 **{ 1240 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1241 }, 1242 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1243 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1244 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1245 "DECODE": lambda self: self._parse_decode(), 1246 "EXTRACT": lambda self: self._parse_extract(), 1247 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1248 "GAP_FILL": lambda self: self._parse_gap_fill(), 1249 "JSON_OBJECT": lambda self: self._parse_json_object(), 1250 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1251 "JSON_TABLE": lambda self: self._parse_json_table(), 1252 "MATCH": lambda self: self._parse_match_against(), 1253 "NORMALIZE": lambda self: self._parse_normalize(), 1254 "OPENJSON": lambda self: self._parse_open_json(), 1255 "OVERLAY": lambda self: self._parse_overlay(), 1256 "POSITION": lambda self: self._parse_position(), 1257 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "STRING_AGG": lambda self: self._parse_string_agg(), 1259 "SUBSTRING": lambda self: self._parse_substring(), 1260 "TRIM": lambda self: self._parse_trim(), 1261 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1262 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1263 "XMLELEMENT": lambda self: self.expression( 1264 exp.XMLElement, 1265 this=self._match_text_seq("NAME") and self._parse_id_var(), 1266 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1267 ), 1268 "XMLTABLE": lambda self: self._parse_xml_table(), 1269 } 1270 1271 QUERY_MODIFIER_PARSERS = { 1272 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1273 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1274 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1275 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1276 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1277 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1278 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1279 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1280 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1281 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1282 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1283 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1284 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1285 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1286 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1287 TokenType.CLUSTER_BY: lambda self: ( 1288 "cluster", 1289 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1290 ), 1291 TokenType.DISTRIBUTE_BY: lambda self: ( 1292 "distribute", 1293 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1294 ), 1295 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1296 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1297 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1298 } 1299 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1300 1301 SET_PARSERS = { 1302 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1303 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1304 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1305 "TRANSACTION": lambda self: self._parse_set_transaction(), 1306 } 1307 1308 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1309 1310 TYPE_LITERAL_PARSERS = { 1311 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1312 } 1313 1314 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1315 1316 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1317 1318 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1319 1320 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1321 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1322 "ISOLATION": ( 1323 ("LEVEL", "REPEATABLE", "READ"), 1324 ("LEVEL", "READ", "COMMITTED"), 1325 ("LEVEL", "READ", "UNCOMITTED"), 1326 ("LEVEL", "SERIALIZABLE"), 1327 ), 1328 "READ": ("WRITE", "ONLY"), 1329 } 1330 1331 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1332 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1333 ) 1334 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1335 1336 CREATE_SEQUENCE: OPTIONS_TYPE = { 1337 "SCALE": ("EXTEND", "NOEXTEND"), 1338 "SHARD": ("EXTEND", "NOEXTEND"), 1339 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1340 **dict.fromkeys( 1341 ( 1342 "SESSION", 1343 "GLOBAL", 1344 "KEEP", 1345 "NOKEEP", 1346 "ORDER", 1347 "NOORDER", 1348 "NOCACHE", 1349 "CYCLE", 1350 "NOCYCLE", 1351 "NOMINVALUE", 1352 "NOMAXVALUE", 1353 "NOSCALE", 1354 "NOSHARD", 1355 ), 1356 tuple(), 1357 ), 1358 } 1359 1360 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1361 1362 USABLES: OPTIONS_TYPE = dict.fromkeys( 1363 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1364 ) 1365 1366 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1367 1368 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1369 "TYPE": ("EVOLUTION",), 1370 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1371 } 1372 1373 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1374 1375 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1376 1377 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1378 "NOT": ("ENFORCED",), 1379 "MATCH": ( 1380 "FULL", 1381 "PARTIAL", 1382 "SIMPLE", 1383 ), 1384 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1385 "USING": ( 1386 "BTREE", 1387 "HASH", 1388 ), 1389 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1390 } 1391 1392 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1393 "NO": ("OTHERS",), 1394 "CURRENT": ("ROW",), 1395 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1396 } 1397 1398 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1399 1400 CLONE_KEYWORDS = {"CLONE", "COPY"} 1401 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1402 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1403 1404 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1405 1406 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1407 1408 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1409 1410 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1411 1412 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1413 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1414 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1415 1416 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1417 1418 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1419 1420 ADD_CONSTRAINT_TOKENS = { 1421 TokenType.CONSTRAINT, 1422 TokenType.FOREIGN_KEY, 1423 TokenType.INDEX, 1424 TokenType.KEY, 1425 TokenType.PRIMARY_KEY, 1426 TokenType.UNIQUE, 1427 } 1428 1429 DISTINCT_TOKENS = {TokenType.DISTINCT} 1430 1431 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1432 1433 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1434 1435 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1436 1437 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1438 1439 ODBC_DATETIME_LITERALS = { 1440 "d": exp.Date, 1441 "t": exp.Time, 1442 "ts": exp.Timestamp, 1443 } 1444 1445 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1446 1447 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1448 1449 # The style options for the DESCRIBE statement 1450 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1451 1452 # The style options for the ANALYZE statement 1453 ANALYZE_STYLES = { 1454 "BUFFER_USAGE_LIMIT", 1455 "FULL", 1456 "LOCAL", 1457 "NO_WRITE_TO_BINLOG", 1458 "SAMPLE", 1459 "SKIP_LOCKED", 1460 "VERBOSE", 1461 } 1462 1463 ANALYZE_EXPRESSION_PARSERS = { 1464 "ALL": lambda self: self._parse_analyze_columns(), 1465 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1466 "DELETE": lambda self: self._parse_analyze_delete(), 1467 "DROP": lambda self: self._parse_analyze_histogram(), 1468 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1469 "LIST": lambda self: self._parse_analyze_list(), 1470 "PREDICATE": lambda self: self._parse_analyze_columns(), 1471 "UPDATE": lambda self: self._parse_analyze_histogram(), 1472 "VALIDATE": lambda self: self._parse_analyze_validate(), 1473 } 1474 1475 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1476 1477 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1478 1479 OPERATION_MODIFIERS: t.Set[str] = set() 1480 1481 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1482 1483 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1484 1485 STRICT_CAST = True 1486 1487 PREFIXED_PIVOT_COLUMNS = False 1488 IDENTIFY_PIVOT_STRINGS = False 1489 1490 LOG_DEFAULTS_TO_LN = False 1491 1492 # Whether the table sample clause expects CSV syntax 1493 TABLESAMPLE_CSV = False 1494 1495 # The default method used for table sampling 1496 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1497 1498 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1499 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1500 1501 # Whether the TRIM function expects the characters to trim as its first argument 1502 TRIM_PATTERN_FIRST = False 1503 1504 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1505 STRING_ALIASES = False 1506 1507 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1508 MODIFIERS_ATTACHED_TO_SET_OP = True 1509 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1510 1511 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1512 NO_PAREN_IF_COMMANDS = True 1513 1514 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1515 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1516 1517 # Whether the `:` operator is used to extract a value from a VARIANT column 1518 COLON_IS_VARIANT_EXTRACT = False 1519 1520 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1521 # If this is True and '(' is not found, the keyword will be treated as an identifier 1522 VALUES_FOLLOWED_BY_PAREN = True 1523 1524 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1525 SUPPORTS_IMPLICIT_UNNEST = False 1526 1527 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1528 INTERVAL_SPANS = True 1529 1530 # Whether a PARTITION clause can follow a table reference 1531 SUPPORTS_PARTITION_SELECTION = False 1532 1533 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1534 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1535 1536 # Whether the 'AS' keyword is optional in the CTE definition syntax 1537 OPTIONAL_ALIAS_TOKEN_CTE = True 1538 1539 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1540 ALTER_RENAME_REQUIRES_COLUMN = True 1541 1542 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1543 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1544 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1545 # as BigQuery, where all joins have the same precedence. 1546 JOINS_HAVE_EQUAL_PRECEDENCE = False 1547 1548 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1549 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1550 1551 # Whether map literals support arbitrary expressions as keys. 1552 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1553 # When False, keys are typically restricted to identifiers. 1554 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1555 1556 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1557 # is true for Snowflake but not for BigQuery which can also process strings 1558 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1559 1560 # Dialects like Databricks support JOINS without join criteria 1561 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1562 ADD_JOIN_ON_TRUE = False 1563 1564 __slots__ = ( 1565 "error_level", 1566 "error_message_context", 1567 "max_errors", 1568 "dialect", 1569 "sql", 1570 "errors", 1571 "_tokens", 1572 "_index", 1573 "_curr", 1574 "_next", 1575 "_prev", 1576 "_prev_comments", 1577 "_pipe_cte_counter", 1578 ) 1579 1580 # Autofilled 1581 SHOW_TRIE: t.Dict = {} 1582 SET_TRIE: t.Dict = {} 1583 1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset() 1598 1599 def reset(self): 1600 self.sql = "" 1601 self.errors = [] 1602 self._tokens = [] 1603 self._index = 0 1604 self._curr = None 1605 self._next = None 1606 self._prev = None 1607 self._prev_comments = None 1608 self._pipe_cte_counter = 0 1609 1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 ) 1627 1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1] 1663 1664 def _parse( 1665 self, 1666 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1667 raw_tokens: t.List[Token], 1668 sql: t.Optional[str] = None, 1669 ) -> t.List[t.Optional[exp.Expression]]: 1670 self.reset() 1671 self.sql = sql or "" 1672 1673 total = len(raw_tokens) 1674 chunks: t.List[t.List[Token]] = [[]] 1675 1676 for i, token in enumerate(raw_tokens): 1677 if token.token_type == TokenType.SEMICOLON: 1678 if token.comments: 1679 chunks.append([token]) 1680 1681 if i < total - 1: 1682 chunks.append([]) 1683 else: 1684 chunks[-1].append(token) 1685 1686 expressions = [] 1687 1688 for tokens in chunks: 1689 self._index = -1 1690 self._tokens = tokens 1691 self._advance() 1692 1693 expressions.append(parse_method(self)) 1694 1695 if self._index < len(self._tokens): 1696 self.raise_error("Invalid expression / Unexpected token") 1697 1698 self.check_errors() 1699 1700 return expressions 1701 1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 ) 1712 1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error) 1740 1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance) 1758 1759 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1760 if expression and self._prev_comments: 1761 expression.add_comments(self._prev_comments) 1762 self._prev_comments = None 1763 1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression 1780 1781 def _find_sql(self, start: Token, end: Token) -> str: 1782 return self.sql[start.start : end.end + 1] 1783 1784 def _is_connected(self) -> bool: 1785 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1786 1787 def _advance(self, times: int = 1) -> None: 1788 self._index += times 1789 self._curr = seq_get(self._tokens, self._index) 1790 self._next = seq_get(self._tokens, self._index + 1) 1791 1792 if self._index > 0: 1793 self._prev = self._tokens[self._index - 1] 1794 self._prev_comments = self._prev.comments 1795 else: 1796 self._prev = None 1797 self._prev_comments = None 1798 1799 def _retreat(self, index: int) -> None: 1800 if index != self._index: 1801 self._advance(index - self._index) 1802 1803 def _warn_unsupported(self) -> None: 1804 if len(self._tokens) <= 1: 1805 return 1806 1807 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1808 # interested in emitting a warning for the one being currently processed. 1809 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1810 1811 logger.warning( 1812 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1813 ) 1814 1815 def _parse_command(self) -> exp.Command: 1816 self._warn_unsupported() 1817 return self.expression( 1818 exp.Command, 1819 comments=self._prev_comments, 1820 this=self._prev.text.upper(), 1821 expression=self._parse_string(), 1822 ) 1823 1824 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1825 """ 1826 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1827 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1828 solve this by setting & resetting the parser state accordingly 1829 """ 1830 index = self._index 1831 error_level = self.error_level 1832 1833 self.error_level = ErrorLevel.IMMEDIATE 1834 try: 1835 this = parse_method() 1836 except ParseError: 1837 this = None 1838 finally: 1839 if not this or retreat: 1840 self._retreat(index) 1841 self.error_level = error_level 1842 1843 return this 1844 1845 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1846 start = self._prev 1847 exists = self._parse_exists() if allow_exists else None 1848 1849 self._match(TokenType.ON) 1850 1851 materialized = self._match_text_seq("MATERIALIZED") 1852 kind = self._match_set(self.CREATABLES) and self._prev 1853 if not kind: 1854 return self._parse_as_command(start) 1855 1856 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1857 this = self._parse_user_defined_function(kind=kind.token_type) 1858 elif kind.token_type == TokenType.TABLE: 1859 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1860 elif kind.token_type == TokenType.COLUMN: 1861 this = self._parse_column() 1862 else: 1863 this = self._parse_id_var() 1864 1865 self._match(TokenType.IS) 1866 1867 return self.expression( 1868 exp.Comment, 1869 this=this, 1870 kind=kind.text, 1871 expression=self._parse_string(), 1872 exists=exists, 1873 materialized=materialized, 1874 ) 1875 1876 def _parse_to_table( 1877 self, 1878 ) -> exp.ToTableProperty: 1879 table = self._parse_table_parts(schema=True) 1880 return self.expression(exp.ToTableProperty, this=table) 1881 1882 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1883 def _parse_ttl(self) -> exp.Expression: 1884 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1885 this = self._parse_bitwise() 1886 1887 if self._match_text_seq("DELETE"): 1888 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1889 if self._match_text_seq("RECOMPRESS"): 1890 return self.expression( 1891 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1892 ) 1893 if self._match_text_seq("TO", "DISK"): 1894 return self.expression( 1895 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1896 ) 1897 if self._match_text_seq("TO", "VOLUME"): 1898 return self.expression( 1899 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1900 ) 1901 1902 return this 1903 1904 expressions = self._parse_csv(_parse_ttl_action) 1905 where = self._parse_where() 1906 group = self._parse_group() 1907 1908 aggregates = None 1909 if group and self._match(TokenType.SET): 1910 aggregates = self._parse_csv(self._parse_set_item) 1911 1912 return self.expression( 1913 exp.MergeTreeTTL, 1914 expressions=expressions, 1915 where=where, 1916 group=group, 1917 aggregates=aggregates, 1918 ) 1919 1920 def _parse_statement(self) -> t.Optional[exp.Expression]: 1921 if self._curr is None: 1922 return None 1923 1924 if self._match_set(self.STATEMENT_PARSERS): 1925 comments = self._prev_comments 1926 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1927 stmt.add_comments(comments, prepend=True) 1928 return stmt 1929 1930 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1931 return self._parse_command() 1932 1933 expression = self._parse_expression() 1934 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1935 return self._parse_query_modifiers(expression) 1936 1937 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1938 start = self._prev 1939 temporary = self._match(TokenType.TEMPORARY) 1940 materialized = self._match_text_seq("MATERIALIZED") 1941 1942 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1943 if not kind: 1944 return self._parse_as_command(start) 1945 1946 concurrently = self._match_text_seq("CONCURRENTLY") 1947 if_exists = exists or self._parse_exists() 1948 1949 if kind == "COLUMN": 1950 this = self._parse_column() 1951 else: 1952 this = self._parse_table_parts( 1953 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1954 ) 1955 1956 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1957 1958 if self._match(TokenType.L_PAREN, advance=False): 1959 expressions = self._parse_wrapped_csv(self._parse_types) 1960 else: 1961 expressions = None 1962 1963 return self.expression( 1964 exp.Drop, 1965 exists=if_exists, 1966 this=this, 1967 expressions=expressions, 1968 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1969 temporary=temporary, 1970 materialized=materialized, 1971 cascade=self._match_text_seq("CASCADE"), 1972 constraints=self._match_text_seq("CONSTRAINTS"), 1973 purge=self._match_text_seq("PURGE"), 1974 cluster=cluster, 1975 concurrently=concurrently, 1976 ) 1977 1978 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1979 return ( 1980 self._match_text_seq("IF") 1981 and (not not_ or self._match(TokenType.NOT)) 1982 and self._match(TokenType.EXISTS) 1983 ) 1984 1985 def _parse_create(self) -> exp.Create | exp.Command: 1986 # Note: this can't be None because we've matched a statement parser 1987 start = self._prev 1988 1989 replace = ( 1990 start.token_type == TokenType.REPLACE 1991 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1992 or self._match_pair(TokenType.OR, TokenType.ALTER) 1993 ) 1994 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1995 1996 unique = self._match(TokenType.UNIQUE) 1997 1998 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1999 clustered = True 2000 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2001 "COLUMNSTORE" 2002 ): 2003 clustered = False 2004 else: 2005 clustered = None 2006 2007 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2008 self._advance() 2009 2010 properties = None 2011 create_token = self._match_set(self.CREATABLES) and self._prev 2012 2013 if not create_token: 2014 # exp.Properties.Location.POST_CREATE 2015 properties = self._parse_properties() 2016 create_token = self._match_set(self.CREATABLES) and self._prev 2017 2018 if not properties or not create_token: 2019 return self._parse_as_command(start) 2020 2021 concurrently = self._match_text_seq("CONCURRENTLY") 2022 exists = self._parse_exists(not_=True) 2023 this = None 2024 expression: t.Optional[exp.Expression] = None 2025 indexes = None 2026 no_schema_binding = None 2027 begin = None 2028 end = None 2029 clone = None 2030 2031 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2032 nonlocal properties 2033 if properties and temp_props: 2034 properties.expressions.extend(temp_props.expressions) 2035 elif temp_props: 2036 properties = temp_props 2037 2038 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2039 this = self._parse_user_defined_function(kind=create_token.token_type) 2040 2041 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2042 extend_props(self._parse_properties()) 2043 2044 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2045 extend_props(self._parse_properties()) 2046 2047 if not expression: 2048 if self._match(TokenType.COMMAND): 2049 expression = self._parse_as_command(self._prev) 2050 else: 2051 begin = self._match(TokenType.BEGIN) 2052 return_ = self._match_text_seq("RETURN") 2053 2054 if self._match(TokenType.STRING, advance=False): 2055 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2056 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2057 expression = self._parse_string() 2058 extend_props(self._parse_properties()) 2059 else: 2060 expression = self._parse_user_defined_function_expression() 2061 2062 end = self._match_text_seq("END") 2063 2064 if return_: 2065 expression = self.expression(exp.Return, this=expression) 2066 elif create_token.token_type == TokenType.INDEX: 2067 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2068 if not self._match(TokenType.ON): 2069 index = self._parse_id_var() 2070 anonymous = False 2071 else: 2072 index = None 2073 anonymous = True 2074 2075 this = self._parse_index(index=index, anonymous=anonymous) 2076 elif create_token.token_type in self.DB_CREATABLES: 2077 table_parts = self._parse_table_parts( 2078 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2079 ) 2080 2081 # exp.Properties.Location.POST_NAME 2082 self._match(TokenType.COMMA) 2083 extend_props(self._parse_properties(before=True)) 2084 2085 this = self._parse_schema(this=table_parts) 2086 2087 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2088 extend_props(self._parse_properties()) 2089 2090 has_alias = self._match(TokenType.ALIAS) 2091 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2092 # exp.Properties.Location.POST_ALIAS 2093 extend_props(self._parse_properties()) 2094 2095 if create_token.token_type == TokenType.SEQUENCE: 2096 expression = self._parse_types() 2097 props = self._parse_properties() 2098 if props: 2099 sequence_props = exp.SequenceProperties() 2100 options = [] 2101 for prop in props: 2102 if isinstance(prop, exp.SequenceProperties): 2103 for arg, value in prop.args.items(): 2104 if arg == "options": 2105 options.extend(value) 2106 else: 2107 sequence_props.set(arg, value) 2108 prop.pop() 2109 2110 if options: 2111 sequence_props.set("options", options) 2112 2113 props.append("expressions", sequence_props) 2114 extend_props(props) 2115 else: 2116 expression = self._parse_ddl_select() 2117 2118 # Some dialects also support using a table as an alias instead of a SELECT. 2119 # Here we fallback to this as an alternative. 2120 if not expression and has_alias: 2121 expression = self._try_parse(self._parse_table_parts) 2122 2123 if create_token.token_type == TokenType.TABLE: 2124 # exp.Properties.Location.POST_EXPRESSION 2125 extend_props(self._parse_properties()) 2126 2127 indexes = [] 2128 while True: 2129 index = self._parse_index() 2130 2131 # exp.Properties.Location.POST_INDEX 2132 extend_props(self._parse_properties()) 2133 if not index: 2134 break 2135 else: 2136 self._match(TokenType.COMMA) 2137 indexes.append(index) 2138 elif create_token.token_type == TokenType.VIEW: 2139 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2140 no_schema_binding = True 2141 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2142 extend_props(self._parse_properties()) 2143 2144 shallow = self._match_text_seq("SHALLOW") 2145 2146 if self._match_texts(self.CLONE_KEYWORDS): 2147 copy = self._prev.text.lower() == "copy" 2148 clone = self.expression( 2149 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2150 ) 2151 2152 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2153 return self._parse_as_command(start) 2154 2155 create_kind_text = create_token.text.upper() 2156 return self.expression( 2157 exp.Create, 2158 this=this, 2159 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2160 replace=replace, 2161 refresh=refresh, 2162 unique=unique, 2163 expression=expression, 2164 exists=exists, 2165 properties=properties, 2166 indexes=indexes, 2167 no_schema_binding=no_schema_binding, 2168 begin=begin, 2169 end=end, 2170 clone=clone, 2171 concurrently=concurrently, 2172 clustered=clustered, 2173 ) 2174 2175 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2176 seq = exp.SequenceProperties() 2177 2178 options = [] 2179 index = self._index 2180 2181 while self._curr: 2182 self._match(TokenType.COMMA) 2183 if self._match_text_seq("INCREMENT"): 2184 self._match_text_seq("BY") 2185 self._match_text_seq("=") 2186 seq.set("increment", self._parse_term()) 2187 elif self._match_text_seq("MINVALUE"): 2188 seq.set("minvalue", self._parse_term()) 2189 elif self._match_text_seq("MAXVALUE"): 2190 seq.set("maxvalue", self._parse_term()) 2191 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2192 self._match_text_seq("=") 2193 seq.set("start", self._parse_term()) 2194 elif self._match_text_seq("CACHE"): 2195 # T-SQL allows empty CACHE which is initialized dynamically 2196 seq.set("cache", self._parse_number() or True) 2197 elif self._match_text_seq("OWNED", "BY"): 2198 # "OWNED BY NONE" is the default 2199 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2200 else: 2201 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2202 if opt: 2203 options.append(opt) 2204 else: 2205 break 2206 2207 seq.set("options", options if options else None) 2208 return None if self._index == index else seq 2209 2210 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2211 # only used for teradata currently 2212 self._match(TokenType.COMMA) 2213 2214 kwargs = { 2215 "no": self._match_text_seq("NO"), 2216 "dual": self._match_text_seq("DUAL"), 2217 "before": self._match_text_seq("BEFORE"), 2218 "default": self._match_text_seq("DEFAULT"), 2219 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2220 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2221 "after": self._match_text_seq("AFTER"), 2222 "minimum": self._match_texts(("MIN", "MINIMUM")), 2223 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2224 } 2225 2226 if self._match_texts(self.PROPERTY_PARSERS): 2227 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2228 try: 2229 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2230 except TypeError: 2231 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2232 2233 return None 2234 2235 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2236 return self._parse_wrapped_csv(self._parse_property) 2237 2238 def _parse_property(self) -> t.Optional[exp.Expression]: 2239 if self._match_texts(self.PROPERTY_PARSERS): 2240 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2241 2242 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2243 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2244 2245 if self._match_text_seq("COMPOUND", "SORTKEY"): 2246 return self._parse_sortkey(compound=True) 2247 2248 if self._match_text_seq("SQL", "SECURITY"): 2249 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2250 2251 index = self._index 2252 2253 seq_props = self._parse_sequence_properties() 2254 if seq_props: 2255 return seq_props 2256 2257 self._retreat(index) 2258 key = self._parse_column() 2259 2260 if not self._match(TokenType.EQ): 2261 self._retreat(index) 2262 return None 2263 2264 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2265 if isinstance(key, exp.Column): 2266 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2267 2268 value = self._parse_bitwise() or self._parse_var(any_token=True) 2269 2270 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2271 if isinstance(value, exp.Column): 2272 value = exp.var(value.name) 2273 2274 return self.expression(exp.Property, this=key, value=value) 2275 2276 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2277 if self._match_text_seq("BY"): 2278 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2279 2280 self._match(TokenType.ALIAS) 2281 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2282 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2283 2284 return self.expression( 2285 exp.FileFormatProperty, 2286 this=( 2287 self.expression( 2288 exp.InputOutputFormat, 2289 input_format=input_format, 2290 output_format=output_format, 2291 ) 2292 if input_format or output_format 2293 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2294 ), 2295 hive_format=True, 2296 ) 2297 2298 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2299 field = self._parse_field() 2300 if isinstance(field, exp.Identifier) and not field.quoted: 2301 field = exp.var(field) 2302 2303 return field 2304 2305 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2306 self._match(TokenType.EQ) 2307 self._match(TokenType.ALIAS) 2308 2309 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2310 2311 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2312 properties = [] 2313 while True: 2314 if before: 2315 prop = self._parse_property_before() 2316 else: 2317 prop = self._parse_property() 2318 if not prop: 2319 break 2320 for p in ensure_list(prop): 2321 properties.append(p) 2322 2323 if properties: 2324 return self.expression(exp.Properties, expressions=properties) 2325 2326 return None 2327 2328 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2329 return self.expression( 2330 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2331 ) 2332 2333 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2334 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2335 security_specifier = self._prev.text.upper() 2336 return self.expression(exp.SecurityProperty, this=security_specifier) 2337 return None 2338 2339 def _parse_settings_property(self) -> exp.SettingsProperty: 2340 return self.expression( 2341 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2342 ) 2343 2344 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2345 if self._index >= 2: 2346 pre_volatile_token = self._tokens[self._index - 2] 2347 else: 2348 pre_volatile_token = None 2349 2350 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2351 return exp.VolatileProperty() 2352 2353 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2354 2355 def _parse_retention_period(self) -> exp.Var: 2356 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2357 number = self._parse_number() 2358 number_str = f"{number} " if number else "" 2359 unit = self._parse_var(any_token=True) 2360 return exp.var(f"{number_str}{unit}") 2361 2362 def _parse_system_versioning_property( 2363 self, with_: bool = False 2364 ) -> exp.WithSystemVersioningProperty: 2365 self._match(TokenType.EQ) 2366 prop = self.expression( 2367 exp.WithSystemVersioningProperty, 2368 **{ # type: ignore 2369 "on": True, 2370 "with": with_, 2371 }, 2372 ) 2373 2374 if self._match_text_seq("OFF"): 2375 prop.set("on", False) 2376 return prop 2377 2378 self._match(TokenType.ON) 2379 if self._match(TokenType.L_PAREN): 2380 while self._curr and not self._match(TokenType.R_PAREN): 2381 if self._match_text_seq("HISTORY_TABLE", "="): 2382 prop.set("this", self._parse_table_parts()) 2383 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2384 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2385 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2386 prop.set("retention_period", self._parse_retention_period()) 2387 2388 self._match(TokenType.COMMA) 2389 2390 return prop 2391 2392 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2393 self._match(TokenType.EQ) 2394 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2395 prop = self.expression(exp.DataDeletionProperty, on=on) 2396 2397 if self._match(TokenType.L_PAREN): 2398 while self._curr and not self._match(TokenType.R_PAREN): 2399 if self._match_text_seq("FILTER_COLUMN", "="): 2400 prop.set("filter_column", self._parse_column()) 2401 elif self._match_text_seq("RETENTION_PERIOD", "="): 2402 prop.set("retention_period", self._parse_retention_period()) 2403 2404 self._match(TokenType.COMMA) 2405 2406 return prop 2407 2408 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2409 kind = "HASH" 2410 expressions: t.Optional[t.List[exp.Expression]] = None 2411 if self._match_text_seq("BY", "HASH"): 2412 expressions = self._parse_wrapped_csv(self._parse_id_var) 2413 elif self._match_text_seq("BY", "RANDOM"): 2414 kind = "RANDOM" 2415 2416 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2417 buckets: t.Optional[exp.Expression] = None 2418 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2419 buckets = self._parse_number() 2420 2421 return self.expression( 2422 exp.DistributedByProperty, 2423 expressions=expressions, 2424 kind=kind, 2425 buckets=buckets, 2426 order=self._parse_order(), 2427 ) 2428 2429 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2430 self._match_text_seq("KEY") 2431 expressions = self._parse_wrapped_id_vars() 2432 return self.expression(expr_type, expressions=expressions) 2433 2434 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2435 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2436 prop = self._parse_system_versioning_property(with_=True) 2437 self._match_r_paren() 2438 return prop 2439 2440 if self._match(TokenType.L_PAREN, advance=False): 2441 return self._parse_wrapped_properties() 2442 2443 if self._match_text_seq("JOURNAL"): 2444 return self._parse_withjournaltable() 2445 2446 if self._match_texts(self.VIEW_ATTRIBUTES): 2447 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2448 2449 if self._match_text_seq("DATA"): 2450 return self._parse_withdata(no=False) 2451 elif self._match_text_seq("NO", "DATA"): 2452 return self._parse_withdata(no=True) 2453 2454 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2455 return self._parse_serde_properties(with_=True) 2456 2457 if self._match(TokenType.SCHEMA): 2458 return self.expression( 2459 exp.WithSchemaBindingProperty, 2460 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2461 ) 2462 2463 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2464 return self.expression( 2465 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2466 ) 2467 2468 if not self._next: 2469 return None 2470 2471 return self._parse_withisolatedloading() 2472 2473 def _parse_procedure_option(self) -> exp.Expression | None: 2474 if self._match_text_seq("EXECUTE", "AS"): 2475 return self.expression( 2476 exp.ExecuteAsProperty, 2477 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2478 or self._parse_string(), 2479 ) 2480 2481 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2482 2483 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2484 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2485 self._match(TokenType.EQ) 2486 2487 user = self._parse_id_var() 2488 self._match(TokenType.PARAMETER) 2489 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2490 2491 if not user or not host: 2492 return None 2493 2494 return exp.DefinerProperty(this=f"{user}@{host}") 2495 2496 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2497 self._match(TokenType.TABLE) 2498 self._match(TokenType.EQ) 2499 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2500 2501 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2502 return self.expression(exp.LogProperty, no=no) 2503 2504 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2505 return self.expression(exp.JournalProperty, **kwargs) 2506 2507 def _parse_checksum(self) -> exp.ChecksumProperty: 2508 self._match(TokenType.EQ) 2509 2510 on = None 2511 if self._match(TokenType.ON): 2512 on = True 2513 elif self._match_text_seq("OFF"): 2514 on = False 2515 2516 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2517 2518 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2519 return self.expression( 2520 exp.Cluster, 2521 expressions=( 2522 self._parse_wrapped_csv(self._parse_ordered) 2523 if wrapped 2524 else self._parse_csv(self._parse_ordered) 2525 ), 2526 ) 2527 2528 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2529 self._match_text_seq("BY") 2530 2531 self._match_l_paren() 2532 expressions = self._parse_csv(self._parse_column) 2533 self._match_r_paren() 2534 2535 if self._match_text_seq("SORTED", "BY"): 2536 self._match_l_paren() 2537 sorted_by = self._parse_csv(self._parse_ordered) 2538 self._match_r_paren() 2539 else: 2540 sorted_by = None 2541 2542 self._match(TokenType.INTO) 2543 buckets = self._parse_number() 2544 self._match_text_seq("BUCKETS") 2545 2546 return self.expression( 2547 exp.ClusteredByProperty, 2548 expressions=expressions, 2549 sorted_by=sorted_by, 2550 buckets=buckets, 2551 ) 2552 2553 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2554 if not self._match_text_seq("GRANTS"): 2555 self._retreat(self._index - 1) 2556 return None 2557 2558 return self.expression(exp.CopyGrantsProperty) 2559 2560 def _parse_freespace(self) -> exp.FreespaceProperty: 2561 self._match(TokenType.EQ) 2562 return self.expression( 2563 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2564 ) 2565 2566 def _parse_mergeblockratio( 2567 self, no: bool = False, default: bool = False 2568 ) -> exp.MergeBlockRatioProperty: 2569 if self._match(TokenType.EQ): 2570 return self.expression( 2571 exp.MergeBlockRatioProperty, 2572 this=self._parse_number(), 2573 percent=self._match(TokenType.PERCENT), 2574 ) 2575 2576 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2577 2578 def _parse_datablocksize( 2579 self, 2580 default: t.Optional[bool] = None, 2581 minimum: t.Optional[bool] = None, 2582 maximum: t.Optional[bool] = None, 2583 ) -> exp.DataBlocksizeProperty: 2584 self._match(TokenType.EQ) 2585 size = self._parse_number() 2586 2587 units = None 2588 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2589 units = self._prev.text 2590 2591 return self.expression( 2592 exp.DataBlocksizeProperty, 2593 size=size, 2594 units=units, 2595 default=default, 2596 minimum=minimum, 2597 maximum=maximum, 2598 ) 2599 2600 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2601 self._match(TokenType.EQ) 2602 always = self._match_text_seq("ALWAYS") 2603 manual = self._match_text_seq("MANUAL") 2604 never = self._match_text_seq("NEVER") 2605 default = self._match_text_seq("DEFAULT") 2606 2607 autotemp = None 2608 if self._match_text_seq("AUTOTEMP"): 2609 autotemp = self._parse_schema() 2610 2611 return self.expression( 2612 exp.BlockCompressionProperty, 2613 always=always, 2614 manual=manual, 2615 never=never, 2616 default=default, 2617 autotemp=autotemp, 2618 ) 2619 2620 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2621 index = self._index 2622 no = self._match_text_seq("NO") 2623 concurrent = self._match_text_seq("CONCURRENT") 2624 2625 if not self._match_text_seq("ISOLATED", "LOADING"): 2626 self._retreat(index) 2627 return None 2628 2629 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2630 return self.expression( 2631 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2632 ) 2633 2634 def _parse_locking(self) -> exp.LockingProperty: 2635 if self._match(TokenType.TABLE): 2636 kind = "TABLE" 2637 elif self._match(TokenType.VIEW): 2638 kind = "VIEW" 2639 elif self._match(TokenType.ROW): 2640 kind = "ROW" 2641 elif self._match_text_seq("DATABASE"): 2642 kind = "DATABASE" 2643 else: 2644 kind = None 2645 2646 if kind in ("DATABASE", "TABLE", "VIEW"): 2647 this = self._parse_table_parts() 2648 else: 2649 this = None 2650 2651 if self._match(TokenType.FOR): 2652 for_or_in = "FOR" 2653 elif self._match(TokenType.IN): 2654 for_or_in = "IN" 2655 else: 2656 for_or_in = None 2657 2658 if self._match_text_seq("ACCESS"): 2659 lock_type = "ACCESS" 2660 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2661 lock_type = "EXCLUSIVE" 2662 elif self._match_text_seq("SHARE"): 2663 lock_type = "SHARE" 2664 elif self._match_text_seq("READ"): 2665 lock_type = "READ" 2666 elif self._match_text_seq("WRITE"): 2667 lock_type = "WRITE" 2668 elif self._match_text_seq("CHECKSUM"): 2669 lock_type = "CHECKSUM" 2670 else: 2671 lock_type = None 2672 2673 override = self._match_text_seq("OVERRIDE") 2674 2675 return self.expression( 2676 exp.LockingProperty, 2677 this=this, 2678 kind=kind, 2679 for_or_in=for_or_in, 2680 lock_type=lock_type, 2681 override=override, 2682 ) 2683 2684 def _parse_partition_by(self) -> t.List[exp.Expression]: 2685 if self._match(TokenType.PARTITION_BY): 2686 return self._parse_csv(self._parse_assignment) 2687 return [] 2688 2689 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2690 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2691 if self._match_text_seq("MINVALUE"): 2692 return exp.var("MINVALUE") 2693 if self._match_text_seq("MAXVALUE"): 2694 return exp.var("MAXVALUE") 2695 return self._parse_bitwise() 2696 2697 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2698 expression = None 2699 from_expressions = None 2700 to_expressions = None 2701 2702 if self._match(TokenType.IN): 2703 this = self._parse_wrapped_csv(self._parse_bitwise) 2704 elif self._match(TokenType.FROM): 2705 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2706 self._match_text_seq("TO") 2707 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2708 elif self._match_text_seq("WITH", "(", "MODULUS"): 2709 this = self._parse_number() 2710 self._match_text_seq(",", "REMAINDER") 2711 expression = self._parse_number() 2712 self._match_r_paren() 2713 else: 2714 self.raise_error("Failed to parse partition bound spec.") 2715 2716 return self.expression( 2717 exp.PartitionBoundSpec, 2718 this=this, 2719 expression=expression, 2720 from_expressions=from_expressions, 2721 to_expressions=to_expressions, 2722 ) 2723 2724 # https://www.postgresql.org/docs/current/sql-createtable.html 2725 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2726 if not self._match_text_seq("OF"): 2727 self._retreat(self._index - 1) 2728 return None 2729 2730 this = self._parse_table(schema=True) 2731 2732 if self._match(TokenType.DEFAULT): 2733 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2734 elif self._match_text_seq("FOR", "VALUES"): 2735 expression = self._parse_partition_bound_spec() 2736 else: 2737 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2738 2739 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2740 2741 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2742 self._match(TokenType.EQ) 2743 return self.expression( 2744 exp.PartitionedByProperty, 2745 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2746 ) 2747 2748 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2749 if self._match_text_seq("AND", "STATISTICS"): 2750 statistics = True 2751 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2752 statistics = False 2753 else: 2754 statistics = None 2755 2756 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2757 2758 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2759 if self._match_text_seq("SQL"): 2760 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2761 return None 2762 2763 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2764 if self._match_text_seq("SQL", "DATA"): 2765 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2766 return None 2767 2768 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2769 if self._match_text_seq("PRIMARY", "INDEX"): 2770 return exp.NoPrimaryIndexProperty() 2771 if self._match_text_seq("SQL"): 2772 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2773 return None 2774 2775 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2776 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2777 return exp.OnCommitProperty() 2778 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2779 return exp.OnCommitProperty(delete=True) 2780 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2781 2782 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2783 if self._match_text_seq("SQL", "DATA"): 2784 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2785 return None 2786 2787 def _parse_distkey(self) -> exp.DistKeyProperty: 2788 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2789 2790 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2791 table = self._parse_table(schema=True) 2792 2793 options = [] 2794 while self._match_texts(("INCLUDING", "EXCLUDING")): 2795 this = self._prev.text.upper() 2796 2797 id_var = self._parse_id_var() 2798 if not id_var: 2799 return None 2800 2801 options.append( 2802 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2803 ) 2804 2805 return self.expression(exp.LikeProperty, this=table, expressions=options) 2806 2807 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2808 return self.expression( 2809 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2810 ) 2811 2812 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2813 self._match(TokenType.EQ) 2814 return self.expression( 2815 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2816 ) 2817 2818 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2819 self._match_text_seq("WITH", "CONNECTION") 2820 return self.expression( 2821 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2822 ) 2823 2824 def _parse_returns(self) -> exp.ReturnsProperty: 2825 value: t.Optional[exp.Expression] 2826 null = None 2827 is_table = self._match(TokenType.TABLE) 2828 2829 if is_table: 2830 if self._match(TokenType.LT): 2831 value = self.expression( 2832 exp.Schema, 2833 this="TABLE", 2834 expressions=self._parse_csv(self._parse_struct_types), 2835 ) 2836 if not self._match(TokenType.GT): 2837 self.raise_error("Expecting >") 2838 else: 2839 value = self._parse_schema(exp.var("TABLE")) 2840 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2841 null = True 2842 value = None 2843 else: 2844 value = self._parse_types() 2845 2846 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2847 2848 def _parse_describe(self) -> exp.Describe: 2849 kind = self._match_set(self.CREATABLES) and self._prev.text 2850 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2851 if self._match(TokenType.DOT): 2852 style = None 2853 self._retreat(self._index - 2) 2854 2855 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2856 2857 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2858 this = self._parse_statement() 2859 else: 2860 this = self._parse_table(schema=True) 2861 2862 properties = self._parse_properties() 2863 expressions = properties.expressions if properties else None 2864 partition = self._parse_partition() 2865 return self.expression( 2866 exp.Describe, 2867 this=this, 2868 style=style, 2869 kind=kind, 2870 expressions=expressions, 2871 partition=partition, 2872 format=format, 2873 ) 2874 2875 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2876 kind = self._prev.text.upper() 2877 expressions = [] 2878 2879 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2880 if self._match(TokenType.WHEN): 2881 expression = self._parse_disjunction() 2882 self._match(TokenType.THEN) 2883 else: 2884 expression = None 2885 2886 else_ = self._match(TokenType.ELSE) 2887 2888 if not self._match(TokenType.INTO): 2889 return None 2890 2891 return self.expression( 2892 exp.ConditionalInsert, 2893 this=self.expression( 2894 exp.Insert, 2895 this=self._parse_table(schema=True), 2896 expression=self._parse_derived_table_values(), 2897 ), 2898 expression=expression, 2899 else_=else_, 2900 ) 2901 2902 expression = parse_conditional_insert() 2903 while expression is not None: 2904 expressions.append(expression) 2905 expression = parse_conditional_insert() 2906 2907 return self.expression( 2908 exp.MultitableInserts, 2909 kind=kind, 2910 comments=comments, 2911 expressions=expressions, 2912 source=self._parse_table(), 2913 ) 2914 2915 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2916 comments = [] 2917 hint = self._parse_hint() 2918 overwrite = self._match(TokenType.OVERWRITE) 2919 ignore = self._match(TokenType.IGNORE) 2920 local = self._match_text_seq("LOCAL") 2921 alternative = None 2922 is_function = None 2923 2924 if self._match_text_seq("DIRECTORY"): 2925 this: t.Optional[exp.Expression] = self.expression( 2926 exp.Directory, 2927 this=self._parse_var_or_string(), 2928 local=local, 2929 row_format=self._parse_row_format(match_row=True), 2930 ) 2931 else: 2932 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2933 comments += ensure_list(self._prev_comments) 2934 return self._parse_multitable_inserts(comments) 2935 2936 if self._match(TokenType.OR): 2937 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2938 2939 self._match(TokenType.INTO) 2940 comments += ensure_list(self._prev_comments) 2941 self._match(TokenType.TABLE) 2942 is_function = self._match(TokenType.FUNCTION) 2943 2944 this = ( 2945 self._parse_table(schema=True, parse_partition=True) 2946 if not is_function 2947 else self._parse_function() 2948 ) 2949 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2950 this.set("alias", self._parse_table_alias()) 2951 2952 returning = self._parse_returning() 2953 2954 return self.expression( 2955 exp.Insert, 2956 comments=comments, 2957 hint=hint, 2958 is_function=is_function, 2959 this=this, 2960 stored=self._match_text_seq("STORED") and self._parse_stored(), 2961 by_name=self._match_text_seq("BY", "NAME"), 2962 exists=self._parse_exists(), 2963 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2964 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2965 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2966 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2967 conflict=self._parse_on_conflict(), 2968 returning=returning or self._parse_returning(), 2969 overwrite=overwrite, 2970 alternative=alternative, 2971 ignore=ignore, 2972 source=self._match(TokenType.TABLE) and self._parse_table(), 2973 ) 2974 2975 def _parse_kill(self) -> exp.Kill: 2976 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2977 2978 return self.expression( 2979 exp.Kill, 2980 this=self._parse_primary(), 2981 kind=kind, 2982 ) 2983 2984 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2985 conflict = self._match_text_seq("ON", "CONFLICT") 2986 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2987 2988 if not conflict and not duplicate: 2989 return None 2990 2991 conflict_keys = None 2992 constraint = None 2993 2994 if conflict: 2995 if self._match_text_seq("ON", "CONSTRAINT"): 2996 constraint = self._parse_id_var() 2997 elif self._match(TokenType.L_PAREN): 2998 conflict_keys = self._parse_csv(self._parse_id_var) 2999 self._match_r_paren() 3000 3001 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3002 if self._prev.token_type == TokenType.UPDATE: 3003 self._match(TokenType.SET) 3004 expressions = self._parse_csv(self._parse_equality) 3005 else: 3006 expressions = None 3007 3008 return self.expression( 3009 exp.OnConflict, 3010 duplicate=duplicate, 3011 expressions=expressions, 3012 action=action, 3013 conflict_keys=conflict_keys, 3014 constraint=constraint, 3015 where=self._parse_where(), 3016 ) 3017 3018 def _parse_returning(self) -> t.Optional[exp.Returning]: 3019 if not self._match(TokenType.RETURNING): 3020 return None 3021 return self.expression( 3022 exp.Returning, 3023 expressions=self._parse_csv(self._parse_expression), 3024 into=self._match(TokenType.INTO) and self._parse_table_part(), 3025 ) 3026 3027 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3028 if not self._match(TokenType.FORMAT): 3029 return None 3030 return self._parse_row_format() 3031 3032 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3033 index = self._index 3034 with_ = with_ or self._match_text_seq("WITH") 3035 3036 if not self._match(TokenType.SERDE_PROPERTIES): 3037 self._retreat(index) 3038 return None 3039 return self.expression( 3040 exp.SerdeProperties, 3041 **{ # type: ignore 3042 "expressions": self._parse_wrapped_properties(), 3043 "with": with_, 3044 }, 3045 ) 3046 3047 def _parse_row_format( 3048 self, match_row: bool = False 3049 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3050 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3051 return None 3052 3053 if self._match_text_seq("SERDE"): 3054 this = self._parse_string() 3055 3056 serde_properties = self._parse_serde_properties() 3057 3058 return self.expression( 3059 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3060 ) 3061 3062 self._match_text_seq("DELIMITED") 3063 3064 kwargs = {} 3065 3066 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3067 kwargs["fields"] = self._parse_string() 3068 if self._match_text_seq("ESCAPED", "BY"): 3069 kwargs["escaped"] = self._parse_string() 3070 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3071 kwargs["collection_items"] = self._parse_string() 3072 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3073 kwargs["map_keys"] = self._parse_string() 3074 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3075 kwargs["lines"] = self._parse_string() 3076 if self._match_text_seq("NULL", "DEFINED", "AS"): 3077 kwargs["null"] = self._parse_string() 3078 3079 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3080 3081 def _parse_load(self) -> exp.LoadData | exp.Command: 3082 if self._match_text_seq("DATA"): 3083 local = self._match_text_seq("LOCAL") 3084 self._match_text_seq("INPATH") 3085 inpath = self._parse_string() 3086 overwrite = self._match(TokenType.OVERWRITE) 3087 self._match_pair(TokenType.INTO, TokenType.TABLE) 3088 3089 return self.expression( 3090 exp.LoadData, 3091 this=self._parse_table(schema=True), 3092 local=local, 3093 overwrite=overwrite, 3094 inpath=inpath, 3095 partition=self._parse_partition(), 3096 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3097 serde=self._match_text_seq("SERDE") and self._parse_string(), 3098 ) 3099 return self._parse_as_command(self._prev) 3100 3101 def _parse_delete(self) -> exp.Delete: 3102 # This handles MySQL's "Multiple-Table Syntax" 3103 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3104 tables = None 3105 if not self._match(TokenType.FROM, advance=False): 3106 tables = self._parse_csv(self._parse_table) or None 3107 3108 returning = self._parse_returning() 3109 3110 return self.expression( 3111 exp.Delete, 3112 tables=tables, 3113 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3114 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3115 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3116 where=self._parse_where(), 3117 returning=returning or self._parse_returning(), 3118 limit=self._parse_limit(), 3119 ) 3120 3121 def _parse_update(self) -> exp.Update: 3122 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3123 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3124 returning = self._parse_returning() 3125 return self.expression( 3126 exp.Update, 3127 **{ # type: ignore 3128 "this": this, 3129 "expressions": expressions, 3130 "from": self._parse_from(joins=True), 3131 "where": self._parse_where(), 3132 "returning": returning or self._parse_returning(), 3133 "order": self._parse_order(), 3134 "limit": self._parse_limit(), 3135 }, 3136 ) 3137 3138 def _parse_use(self) -> exp.Use: 3139 return self.expression( 3140 exp.Use, 3141 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3142 this=self._parse_table(schema=False), 3143 ) 3144 3145 def _parse_uncache(self) -> exp.Uncache: 3146 if not self._match(TokenType.TABLE): 3147 self.raise_error("Expecting TABLE after UNCACHE") 3148 3149 return self.expression( 3150 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3151 ) 3152 3153 def _parse_cache(self) -> exp.Cache: 3154 lazy = self._match_text_seq("LAZY") 3155 self._match(TokenType.TABLE) 3156 table = self._parse_table(schema=True) 3157 3158 options = [] 3159 if self._match_text_seq("OPTIONS"): 3160 self._match_l_paren() 3161 k = self._parse_string() 3162 self._match(TokenType.EQ) 3163 v = self._parse_string() 3164 options = [k, v] 3165 self._match_r_paren() 3166 3167 self._match(TokenType.ALIAS) 3168 return self.expression( 3169 exp.Cache, 3170 this=table, 3171 lazy=lazy, 3172 options=options, 3173 expression=self._parse_select(nested=True), 3174 ) 3175 3176 def _parse_partition(self) -> t.Optional[exp.Partition]: 3177 if not self._match_texts(self.PARTITION_KEYWORDS): 3178 return None 3179 3180 return self.expression( 3181 exp.Partition, 3182 subpartition=self._prev.text.upper() == "SUBPARTITION", 3183 expressions=self._parse_wrapped_csv(self._parse_assignment), 3184 ) 3185 3186 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3187 def _parse_value_expression() -> t.Optional[exp.Expression]: 3188 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3189 return exp.var(self._prev.text.upper()) 3190 return self._parse_expression() 3191 3192 if self._match(TokenType.L_PAREN): 3193 expressions = self._parse_csv(_parse_value_expression) 3194 self._match_r_paren() 3195 return self.expression(exp.Tuple, expressions=expressions) 3196 3197 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3198 expression = self._parse_expression() 3199 if expression: 3200 return self.expression(exp.Tuple, expressions=[expression]) 3201 return None 3202 3203 def _parse_projections(self) -> t.List[exp.Expression]: 3204 return self._parse_expressions() 3205 3206 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3207 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3208 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3209 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3210 ) 3211 elif self._match(TokenType.FROM): 3212 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3213 # Support parentheses for duckdb FROM-first syntax 3214 select = self._parse_select(from_=from_) 3215 if select: 3216 if not select.args.get("from"): 3217 select.set("from", from_) 3218 this = select 3219 else: 3220 this = exp.select("*").from_(t.cast(exp.From, from_)) 3221 else: 3222 this = ( 3223 self._parse_table(consume_pipe=True) 3224 if table 3225 else self._parse_select(nested=True, parse_set_operation=False) 3226 ) 3227 3228 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3229 # in case a modifier (e.g. join) is following 3230 if table and isinstance(this, exp.Values) and this.alias: 3231 alias = this.args["alias"].pop() 3232 this = exp.Table(this=this, alias=alias) 3233 3234 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3235 3236 return this 3237 3238 def _parse_select( 3239 self, 3240 nested: bool = False, 3241 table: bool = False, 3242 parse_subquery_alias: bool = True, 3243 parse_set_operation: bool = True, 3244 consume_pipe: bool = True, 3245 from_: t.Optional[exp.From] = None, 3246 ) -> t.Optional[exp.Expression]: 3247 query = self._parse_select_query( 3248 nested=nested, 3249 table=table, 3250 parse_subquery_alias=parse_subquery_alias, 3251 parse_set_operation=parse_set_operation, 3252 ) 3253 3254 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3255 if not query and from_: 3256 query = exp.select("*").from_(from_) 3257 if isinstance(query, exp.Query): 3258 query = self._parse_pipe_syntax_query(query) 3259 query = query.subquery(copy=False) if query and table else query 3260 3261 return query 3262 3263 def _parse_select_query( 3264 self, 3265 nested: bool = False, 3266 table: bool = False, 3267 parse_subquery_alias: bool = True, 3268 parse_set_operation: bool = True, 3269 ) -> t.Optional[exp.Expression]: 3270 cte = self._parse_with() 3271 3272 if cte: 3273 this = self._parse_statement() 3274 3275 if not this: 3276 self.raise_error("Failed to parse any statement following CTE") 3277 return cte 3278 3279 if "with" in this.arg_types: 3280 this.set("with", cte) 3281 else: 3282 self.raise_error(f"{this.key} does not support CTE") 3283 this = cte 3284 3285 return this 3286 3287 # duckdb supports leading with FROM x 3288 from_ = ( 3289 self._parse_from(consume_pipe=True) 3290 if self._match(TokenType.FROM, advance=False) 3291 else None 3292 ) 3293 3294 if self._match(TokenType.SELECT): 3295 comments = self._prev_comments 3296 3297 hint = self._parse_hint() 3298 3299 if self._next and not self._next.token_type == TokenType.DOT: 3300 all_ = self._match(TokenType.ALL) 3301 distinct = self._match_set(self.DISTINCT_TOKENS) 3302 else: 3303 all_, distinct = None, None 3304 3305 kind = ( 3306 self._match(TokenType.ALIAS) 3307 and self._match_texts(("STRUCT", "VALUE")) 3308 and self._prev.text.upper() 3309 ) 3310 3311 if distinct: 3312 distinct = self.expression( 3313 exp.Distinct, 3314 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3315 ) 3316 3317 if all_ and distinct: 3318 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3319 3320 operation_modifiers = [] 3321 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3322 operation_modifiers.append(exp.var(self._prev.text.upper())) 3323 3324 limit = self._parse_limit(top=True) 3325 projections = self._parse_projections() 3326 3327 this = self.expression( 3328 exp.Select, 3329 kind=kind, 3330 hint=hint, 3331 distinct=distinct, 3332 expressions=projections, 3333 limit=limit, 3334 operation_modifiers=operation_modifiers or None, 3335 ) 3336 this.comments = comments 3337 3338 into = self._parse_into() 3339 if into: 3340 this.set("into", into) 3341 3342 if not from_: 3343 from_ = self._parse_from() 3344 3345 if from_: 3346 this.set("from", from_) 3347 3348 this = self._parse_query_modifiers(this) 3349 elif (table or nested) and self._match(TokenType.L_PAREN): 3350 this = self._parse_wrapped_select(table=table) 3351 3352 # We return early here so that the UNION isn't attached to the subquery by the 3353 # following call to _parse_set_operations, but instead becomes the parent node 3354 self._match_r_paren() 3355 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3356 elif self._match(TokenType.VALUES, advance=False): 3357 this = self._parse_derived_table_values() 3358 elif from_: 3359 this = exp.select("*").from_(from_.this, copy=False) 3360 elif self._match(TokenType.SUMMARIZE): 3361 table = self._match(TokenType.TABLE) 3362 this = self._parse_select() or self._parse_string() or self._parse_table() 3363 return self.expression(exp.Summarize, this=this, table=table) 3364 elif self._match(TokenType.DESCRIBE): 3365 this = self._parse_describe() 3366 elif self._match_text_seq("STREAM"): 3367 this = self._parse_function() 3368 if this: 3369 this = self.expression(exp.Stream, this=this) 3370 else: 3371 self._retreat(self._index - 1) 3372 else: 3373 this = None 3374 3375 return self._parse_set_operations(this) if parse_set_operation else this 3376 3377 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3378 self._match_text_seq("SEARCH") 3379 3380 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3381 3382 if not kind: 3383 return None 3384 3385 self._match_text_seq("FIRST", "BY") 3386 3387 return self.expression( 3388 exp.RecursiveWithSearch, 3389 kind=kind, 3390 this=self._parse_id_var(), 3391 expression=self._match_text_seq("SET") and self._parse_id_var(), 3392 using=self._match_text_seq("USING") and self._parse_id_var(), 3393 ) 3394 3395 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3396 if not skip_with_token and not self._match(TokenType.WITH): 3397 return None 3398 3399 comments = self._prev_comments 3400 recursive = self._match(TokenType.RECURSIVE) 3401 3402 last_comments = None 3403 expressions = [] 3404 while True: 3405 cte = self._parse_cte() 3406 if isinstance(cte, exp.CTE): 3407 expressions.append(cte) 3408 if last_comments: 3409 cte.add_comments(last_comments) 3410 3411 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3412 break 3413 else: 3414 self._match(TokenType.WITH) 3415 3416 last_comments = self._prev_comments 3417 3418 return self.expression( 3419 exp.With, 3420 comments=comments, 3421 expressions=expressions, 3422 recursive=recursive, 3423 search=self._parse_recursive_with_search(), 3424 ) 3425 3426 def _parse_cte(self) -> t.Optional[exp.CTE]: 3427 index = self._index 3428 3429 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3430 if not alias or not alias.this: 3431 self.raise_error("Expected CTE to have alias") 3432 3433 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3434 self._retreat(index) 3435 return None 3436 3437 comments = self._prev_comments 3438 3439 if self._match_text_seq("NOT", "MATERIALIZED"): 3440 materialized = False 3441 elif self._match_text_seq("MATERIALIZED"): 3442 materialized = True 3443 else: 3444 materialized = None 3445 3446 cte = self.expression( 3447 exp.CTE, 3448 this=self._parse_wrapped(self._parse_statement), 3449 alias=alias, 3450 materialized=materialized, 3451 comments=comments, 3452 ) 3453 3454 values = cte.this 3455 if isinstance(values, exp.Values): 3456 if values.alias: 3457 cte.set("this", exp.select("*").from_(values)) 3458 else: 3459 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3460 3461 return cte 3462 3463 def _parse_table_alias( 3464 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3465 ) -> t.Optional[exp.TableAlias]: 3466 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3467 # so this section tries to parse the clause version and if it fails, it treats the token 3468 # as an identifier (alias) 3469 if self._can_parse_limit_or_offset(): 3470 return None 3471 3472 any_token = self._match(TokenType.ALIAS) 3473 alias = ( 3474 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3475 or self._parse_string_as_identifier() 3476 ) 3477 3478 index = self._index 3479 if self._match(TokenType.L_PAREN): 3480 columns = self._parse_csv(self._parse_function_parameter) 3481 self._match_r_paren() if columns else self._retreat(index) 3482 else: 3483 columns = None 3484 3485 if not alias and not columns: 3486 return None 3487 3488 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3489 3490 # We bubble up comments from the Identifier to the TableAlias 3491 if isinstance(alias, exp.Identifier): 3492 table_alias.add_comments(alias.pop_comments()) 3493 3494 return table_alias 3495 3496 def _parse_subquery( 3497 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3498 ) -> t.Optional[exp.Subquery]: 3499 if not this: 3500 return None 3501 3502 return self.expression( 3503 exp.Subquery, 3504 this=this, 3505 pivots=self._parse_pivots(), 3506 alias=self._parse_table_alias() if parse_alias else None, 3507 sample=self._parse_table_sample(), 3508 ) 3509 3510 def _implicit_unnests_to_explicit(self, this: E) -> E: 3511 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3512 3513 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3514 for i, join in enumerate(this.args.get("joins") or []): 3515 table = join.this 3516 normalized_table = table.copy() 3517 normalized_table.meta["maybe_column"] = True 3518 normalized_table = _norm(normalized_table, dialect=self.dialect) 3519 3520 if isinstance(table, exp.Table) and not join.args.get("on"): 3521 if normalized_table.parts[0].name in refs: 3522 table_as_column = table.to_column() 3523 unnest = exp.Unnest(expressions=[table_as_column]) 3524 3525 # Table.to_column creates a parent Alias node that we want to convert to 3526 # a TableAlias and attach to the Unnest, so it matches the parser's output 3527 if isinstance(table.args.get("alias"), exp.TableAlias): 3528 table_as_column.replace(table_as_column.this) 3529 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3530 3531 table.replace(unnest) 3532 3533 refs.add(normalized_table.alias_or_name) 3534 3535 return this 3536 3537 def _parse_query_modifiers( 3538 self, this: t.Optional[exp.Expression] 3539 ) -> t.Optional[exp.Expression]: 3540 if isinstance(this, self.MODIFIABLES): 3541 for join in self._parse_joins(): 3542 this.append("joins", join) 3543 for lateral in iter(self._parse_lateral, None): 3544 this.append("laterals", lateral) 3545 3546 while True: 3547 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3548 modifier_token = self._curr 3549 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3550 key, expression = parser(self) 3551 3552 if expression: 3553 if this.args.get(key): 3554 self.raise_error( 3555 f"Found multiple '{modifier_token.text.upper()}' clauses", 3556 token=modifier_token, 3557 ) 3558 3559 this.set(key, expression) 3560 if key == "limit": 3561 offset = expression.args.pop("offset", None) 3562 3563 if offset: 3564 offset = exp.Offset(expression=offset) 3565 this.set("offset", offset) 3566 3567 limit_by_expressions = expression.expressions 3568 expression.set("expressions", None) 3569 offset.set("expressions", limit_by_expressions) 3570 continue 3571 break 3572 3573 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3574 this = self._implicit_unnests_to_explicit(this) 3575 3576 return this 3577 3578 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3579 start = self._curr 3580 while self._curr: 3581 self._advance() 3582 3583 end = self._tokens[self._index - 1] 3584 return exp.Hint(expressions=[self._find_sql(start, end)]) 3585 3586 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3587 return self._parse_function_call() 3588 3589 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3590 start_index = self._index 3591 should_fallback_to_string = False 3592 3593 hints = [] 3594 try: 3595 for hint in iter( 3596 lambda: self._parse_csv( 3597 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3598 ), 3599 [], 3600 ): 3601 hints.extend(hint) 3602 except ParseError: 3603 should_fallback_to_string = True 3604 3605 if should_fallback_to_string or self._curr: 3606 self._retreat(start_index) 3607 return self._parse_hint_fallback_to_string() 3608 3609 return self.expression(exp.Hint, expressions=hints) 3610 3611 def _parse_hint(self) -> t.Optional[exp.Hint]: 3612 if self._match(TokenType.HINT) and self._prev_comments: 3613 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3614 3615 return None 3616 3617 def _parse_into(self) -> t.Optional[exp.Into]: 3618 if not self._match(TokenType.INTO): 3619 return None 3620 3621 temp = self._match(TokenType.TEMPORARY) 3622 unlogged = self._match_text_seq("UNLOGGED") 3623 self._match(TokenType.TABLE) 3624 3625 return self.expression( 3626 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3627 ) 3628 3629 def _parse_from( 3630 self, 3631 joins: bool = False, 3632 skip_from_token: bool = False, 3633 consume_pipe: bool = False, 3634 ) -> t.Optional[exp.From]: 3635 if not skip_from_token and not self._match(TokenType.FROM): 3636 return None 3637 3638 return self.expression( 3639 exp.From, 3640 comments=self._prev_comments, 3641 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3642 ) 3643 3644 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3645 return self.expression( 3646 exp.MatchRecognizeMeasure, 3647 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3648 this=self._parse_expression(), 3649 ) 3650 3651 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3652 if not self._match(TokenType.MATCH_RECOGNIZE): 3653 return None 3654 3655 self._match_l_paren() 3656 3657 partition = self._parse_partition_by() 3658 order = self._parse_order() 3659 3660 measures = ( 3661 self._parse_csv(self._parse_match_recognize_measure) 3662 if self._match_text_seq("MEASURES") 3663 else None 3664 ) 3665 3666 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3667 rows = exp.var("ONE ROW PER MATCH") 3668 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3669 text = "ALL ROWS PER MATCH" 3670 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3671 text += " SHOW EMPTY MATCHES" 3672 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3673 text += " OMIT EMPTY MATCHES" 3674 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3675 text += " WITH UNMATCHED ROWS" 3676 rows = exp.var(text) 3677 else: 3678 rows = None 3679 3680 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3681 text = "AFTER MATCH SKIP" 3682 if self._match_text_seq("PAST", "LAST", "ROW"): 3683 text += " PAST LAST ROW" 3684 elif self._match_text_seq("TO", "NEXT", "ROW"): 3685 text += " TO NEXT ROW" 3686 elif self._match_text_seq("TO", "FIRST"): 3687 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3688 elif self._match_text_seq("TO", "LAST"): 3689 text += f" TO LAST {self._advance_any().text}" # type: ignore 3690 after = exp.var(text) 3691 else: 3692 after = None 3693 3694 if self._match_text_seq("PATTERN"): 3695 self._match_l_paren() 3696 3697 if not self._curr: 3698 self.raise_error("Expecting )", self._curr) 3699 3700 paren = 1 3701 start = self._curr 3702 3703 while self._curr and paren > 0: 3704 if self._curr.token_type == TokenType.L_PAREN: 3705 paren += 1 3706 if self._curr.token_type == TokenType.R_PAREN: 3707 paren -= 1 3708 3709 end = self._prev 3710 self._advance() 3711 3712 if paren > 0: 3713 self.raise_error("Expecting )", self._curr) 3714 3715 pattern = exp.var(self._find_sql(start, end)) 3716 else: 3717 pattern = None 3718 3719 define = ( 3720 self._parse_csv(self._parse_name_as_expression) 3721 if self._match_text_seq("DEFINE") 3722 else None 3723 ) 3724 3725 self._match_r_paren() 3726 3727 return self.expression( 3728 exp.MatchRecognize, 3729 partition_by=partition, 3730 order=order, 3731 measures=measures, 3732 rows=rows, 3733 after=after, 3734 pattern=pattern, 3735 define=define, 3736 alias=self._parse_table_alias(), 3737 ) 3738 3739 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3740 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3741 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3742 cross_apply = False 3743 3744 if cross_apply is not None: 3745 this = self._parse_select(table=True) 3746 view = None 3747 outer = None 3748 elif self._match(TokenType.LATERAL): 3749 this = self._parse_select(table=True) 3750 view = self._match(TokenType.VIEW) 3751 outer = self._match(TokenType.OUTER) 3752 else: 3753 return None 3754 3755 if not this: 3756 this = ( 3757 self._parse_unnest() 3758 or self._parse_function() 3759 or self._parse_id_var(any_token=False) 3760 ) 3761 3762 while self._match(TokenType.DOT): 3763 this = exp.Dot( 3764 this=this, 3765 expression=self._parse_function() or self._parse_id_var(any_token=False), 3766 ) 3767 3768 ordinality: t.Optional[bool] = None 3769 3770 if view: 3771 table = self._parse_id_var(any_token=False) 3772 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3773 table_alias: t.Optional[exp.TableAlias] = self.expression( 3774 exp.TableAlias, this=table, columns=columns 3775 ) 3776 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3777 # We move the alias from the lateral's child node to the lateral itself 3778 table_alias = this.args["alias"].pop() 3779 else: 3780 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3781 table_alias = self._parse_table_alias() 3782 3783 return self.expression( 3784 exp.Lateral, 3785 this=this, 3786 view=view, 3787 outer=outer, 3788 alias=table_alias, 3789 cross_apply=cross_apply, 3790 ordinality=ordinality, 3791 ) 3792 3793 def _parse_join_parts( 3794 self, 3795 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3796 return ( 3797 self._match_set(self.JOIN_METHODS) and self._prev, 3798 self._match_set(self.JOIN_SIDES) and self._prev, 3799 self._match_set(self.JOIN_KINDS) and self._prev, 3800 ) 3801 3802 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3803 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3804 this = self._parse_column() 3805 if isinstance(this, exp.Column): 3806 return this.this 3807 return this 3808 3809 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3810 3811 def _parse_join( 3812 self, skip_join_token: bool = False, parse_bracket: bool = False 3813 ) -> t.Optional[exp.Join]: 3814 if self._match(TokenType.COMMA): 3815 table = self._try_parse(self._parse_table) 3816 cross_join = self.expression(exp.Join, this=table) if table else None 3817 3818 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3819 cross_join.set("kind", "CROSS") 3820 3821 return cross_join 3822 3823 index = self._index 3824 method, side, kind = self._parse_join_parts() 3825 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3826 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3827 join_comments = self._prev_comments 3828 3829 if not skip_join_token and not join: 3830 self._retreat(index) 3831 kind = None 3832 method = None 3833 side = None 3834 3835 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3836 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3837 3838 if not skip_join_token and not join and not outer_apply and not cross_apply: 3839 return None 3840 3841 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3842 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3843 kwargs["expressions"] = self._parse_csv( 3844 lambda: self._parse_table(parse_bracket=parse_bracket) 3845 ) 3846 3847 if method: 3848 kwargs["method"] = method.text 3849 if side: 3850 kwargs["side"] = side.text 3851 if kind: 3852 kwargs["kind"] = kind.text 3853 if hint: 3854 kwargs["hint"] = hint 3855 3856 if self._match(TokenType.MATCH_CONDITION): 3857 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3858 3859 if self._match(TokenType.ON): 3860 kwargs["on"] = self._parse_assignment() 3861 elif self._match(TokenType.USING): 3862 kwargs["using"] = self._parse_using_identifiers() 3863 elif ( 3864 not method 3865 and not (outer_apply or cross_apply) 3866 and not isinstance(kwargs["this"], exp.Unnest) 3867 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3868 ): 3869 index = self._index 3870 joins: t.Optional[list] = list(self._parse_joins()) 3871 3872 if joins and self._match(TokenType.ON): 3873 kwargs["on"] = self._parse_assignment() 3874 elif joins and self._match(TokenType.USING): 3875 kwargs["using"] = self._parse_using_identifiers() 3876 else: 3877 joins = None 3878 self._retreat(index) 3879 3880 kwargs["this"].set("joins", joins if joins else None) 3881 3882 kwargs["pivots"] = self._parse_pivots() 3883 3884 comments = [c for token in (method, side, kind) if token for c in token.comments] 3885 comments = (join_comments or []) + comments 3886 3887 if ( 3888 self.ADD_JOIN_ON_TRUE 3889 and not kwargs.get("on") 3890 and not kwargs.get("using") 3891 and not kwargs.get("method") 3892 and kwargs.get("kind") in (None, "INNER", "OUTER") 3893 ): 3894 kwargs["on"] = exp.true() 3895 3896 return self.expression(exp.Join, comments=comments, **kwargs) 3897 3898 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3899 this = self._parse_assignment() 3900 3901 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3902 return this 3903 3904 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3905 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3906 3907 return this 3908 3909 def _parse_index_params(self) -> exp.IndexParameters: 3910 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3911 3912 if self._match(TokenType.L_PAREN, advance=False): 3913 columns = self._parse_wrapped_csv(self._parse_with_operator) 3914 else: 3915 columns = None 3916 3917 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3918 partition_by = self._parse_partition_by() 3919 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3920 tablespace = ( 3921 self._parse_var(any_token=True) 3922 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3923 else None 3924 ) 3925 where = self._parse_where() 3926 3927 on = self._parse_field() if self._match(TokenType.ON) else None 3928 3929 return self.expression( 3930 exp.IndexParameters, 3931 using=using, 3932 columns=columns, 3933 include=include, 3934 partition_by=partition_by, 3935 where=where, 3936 with_storage=with_storage, 3937 tablespace=tablespace, 3938 on=on, 3939 ) 3940 3941 def _parse_index( 3942 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3943 ) -> t.Optional[exp.Index]: 3944 if index or anonymous: 3945 unique = None 3946 primary = None 3947 amp = None 3948 3949 self._match(TokenType.ON) 3950 self._match(TokenType.TABLE) # hive 3951 table = self._parse_table_parts(schema=True) 3952 else: 3953 unique = self._match(TokenType.UNIQUE) 3954 primary = self._match_text_seq("PRIMARY") 3955 amp = self._match_text_seq("AMP") 3956 3957 if not self._match(TokenType.INDEX): 3958 return None 3959 3960 index = self._parse_id_var() 3961 table = None 3962 3963 params = self._parse_index_params() 3964 3965 return self.expression( 3966 exp.Index, 3967 this=index, 3968 table=table, 3969 unique=unique, 3970 primary=primary, 3971 amp=amp, 3972 params=params, 3973 ) 3974 3975 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3976 hints: t.List[exp.Expression] = [] 3977 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3978 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3979 hints.append( 3980 self.expression( 3981 exp.WithTableHint, 3982 expressions=self._parse_csv( 3983 lambda: self._parse_function() or self._parse_var(any_token=True) 3984 ), 3985 ) 3986 ) 3987 self._match_r_paren() 3988 else: 3989 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3990 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3991 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3992 3993 self._match_set((TokenType.INDEX, TokenType.KEY)) 3994 if self._match(TokenType.FOR): 3995 hint.set("target", self._advance_any() and self._prev.text.upper()) 3996 3997 hint.set("expressions", self._parse_wrapped_id_vars()) 3998 hints.append(hint) 3999 4000 return hints or None 4001 4002 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4003 return ( 4004 (not schema and self._parse_function(optional_parens=False)) 4005 or self._parse_id_var(any_token=False) 4006 or self._parse_string_as_identifier() 4007 or self._parse_placeholder() 4008 ) 4009 4010 def _parse_table_parts( 4011 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4012 ) -> exp.Table: 4013 catalog = None 4014 db = None 4015 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4016 4017 while self._match(TokenType.DOT): 4018 if catalog: 4019 # This allows nesting the table in arbitrarily many dot expressions if needed 4020 table = self.expression( 4021 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4022 ) 4023 else: 4024 catalog = db 4025 db = table 4026 # "" used for tsql FROM a..b case 4027 table = self._parse_table_part(schema=schema) or "" 4028 4029 if ( 4030 wildcard 4031 and self._is_connected() 4032 and (isinstance(table, exp.Identifier) or not table) 4033 and self._match(TokenType.STAR) 4034 ): 4035 if isinstance(table, exp.Identifier): 4036 table.args["this"] += "*" 4037 else: 4038 table = exp.Identifier(this="*") 4039 4040 # We bubble up comments from the Identifier to the Table 4041 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4042 4043 if is_db_reference: 4044 catalog = db 4045 db = table 4046 table = None 4047 4048 if not table and not is_db_reference: 4049 self.raise_error(f"Expected table name but got {self._curr}") 4050 if not db and is_db_reference: 4051 self.raise_error(f"Expected database name but got {self._curr}") 4052 4053 table = self.expression( 4054 exp.Table, 4055 comments=comments, 4056 this=table, 4057 db=db, 4058 catalog=catalog, 4059 ) 4060 4061 changes = self._parse_changes() 4062 if changes: 4063 table.set("changes", changes) 4064 4065 at_before = self._parse_historical_data() 4066 if at_before: 4067 table.set("when", at_before) 4068 4069 pivots = self._parse_pivots() 4070 if pivots: 4071 table.set("pivots", pivots) 4072 4073 return table 4074 4075 def _parse_table( 4076 self, 4077 schema: bool = False, 4078 joins: bool = False, 4079 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4080 parse_bracket: bool = False, 4081 is_db_reference: bool = False, 4082 parse_partition: bool = False, 4083 consume_pipe: bool = False, 4084 ) -> t.Optional[exp.Expression]: 4085 lateral = self._parse_lateral() 4086 if lateral: 4087 return lateral 4088 4089 unnest = self._parse_unnest() 4090 if unnest: 4091 return unnest 4092 4093 values = self._parse_derived_table_values() 4094 if values: 4095 return values 4096 4097 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4098 if subquery: 4099 if not subquery.args.get("pivots"): 4100 subquery.set("pivots", self._parse_pivots()) 4101 return subquery 4102 4103 bracket = parse_bracket and self._parse_bracket(None) 4104 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4105 4106 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4107 self._parse_table 4108 ) 4109 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4110 4111 only = self._match(TokenType.ONLY) 4112 4113 this = t.cast( 4114 exp.Expression, 4115 bracket 4116 or rows_from 4117 or self._parse_bracket( 4118 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4119 ), 4120 ) 4121 4122 if only: 4123 this.set("only", only) 4124 4125 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4126 self._match_text_seq("*") 4127 4128 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4129 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4130 this.set("partition", self._parse_partition()) 4131 4132 if schema: 4133 return self._parse_schema(this=this) 4134 4135 version = self._parse_version() 4136 4137 if version: 4138 this.set("version", version) 4139 4140 if self.dialect.ALIAS_POST_TABLESAMPLE: 4141 this.set("sample", self._parse_table_sample()) 4142 4143 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4144 if alias: 4145 this.set("alias", alias) 4146 4147 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4148 return self.expression( 4149 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4150 ) 4151 4152 this.set("hints", self._parse_table_hints()) 4153 4154 if not this.args.get("pivots"): 4155 this.set("pivots", self._parse_pivots()) 4156 4157 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4158 this.set("sample", self._parse_table_sample()) 4159 4160 if joins: 4161 for join in self._parse_joins(): 4162 this.append("joins", join) 4163 4164 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4165 this.set("ordinality", True) 4166 this.set("alias", self._parse_table_alias()) 4167 4168 return this 4169 4170 def _parse_version(self) -> t.Optional[exp.Version]: 4171 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4172 this = "TIMESTAMP" 4173 elif self._match(TokenType.VERSION_SNAPSHOT): 4174 this = "VERSION" 4175 else: 4176 return None 4177 4178 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4179 kind = self._prev.text.upper() 4180 start = self._parse_bitwise() 4181 self._match_texts(("TO", "AND")) 4182 end = self._parse_bitwise() 4183 expression: t.Optional[exp.Expression] = self.expression( 4184 exp.Tuple, expressions=[start, end] 4185 ) 4186 elif self._match_text_seq("CONTAINED", "IN"): 4187 kind = "CONTAINED IN" 4188 expression = self.expression( 4189 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4190 ) 4191 elif self._match(TokenType.ALL): 4192 kind = "ALL" 4193 expression = None 4194 else: 4195 self._match_text_seq("AS", "OF") 4196 kind = "AS OF" 4197 expression = self._parse_type() 4198 4199 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4200 4201 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4202 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4203 index = self._index 4204 historical_data = None 4205 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4206 this = self._prev.text.upper() 4207 kind = ( 4208 self._match(TokenType.L_PAREN) 4209 and self._match_texts(self.HISTORICAL_DATA_KIND) 4210 and self._prev.text.upper() 4211 ) 4212 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4213 4214 if expression: 4215 self._match_r_paren() 4216 historical_data = self.expression( 4217 exp.HistoricalData, this=this, kind=kind, expression=expression 4218 ) 4219 else: 4220 self._retreat(index) 4221 4222 return historical_data 4223 4224 def _parse_changes(self) -> t.Optional[exp.Changes]: 4225 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4226 return None 4227 4228 information = self._parse_var(any_token=True) 4229 self._match_r_paren() 4230 4231 return self.expression( 4232 exp.Changes, 4233 information=information, 4234 at_before=self._parse_historical_data(), 4235 end=self._parse_historical_data(), 4236 ) 4237 4238 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4239 if not self._match(TokenType.UNNEST): 4240 return None 4241 4242 expressions = self._parse_wrapped_csv(self._parse_equality) 4243 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4244 4245 alias = self._parse_table_alias() if with_alias else None 4246 4247 if alias: 4248 if self.dialect.UNNEST_COLUMN_ONLY: 4249 if alias.args.get("columns"): 4250 self.raise_error("Unexpected extra column alias in unnest.") 4251 4252 alias.set("columns", [alias.this]) 4253 alias.set("this", None) 4254 4255 columns = alias.args.get("columns") or [] 4256 if offset and len(expressions) < len(columns): 4257 offset = columns.pop() 4258 4259 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4260 self._match(TokenType.ALIAS) 4261 offset = self._parse_id_var( 4262 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4263 ) or exp.to_identifier("offset") 4264 4265 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4266 4267 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4268 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4269 if not is_derived and not ( 4270 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4271 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4272 ): 4273 return None 4274 4275 expressions = self._parse_csv(self._parse_value) 4276 alias = self._parse_table_alias() 4277 4278 if is_derived: 4279 self._match_r_paren() 4280 4281 return self.expression( 4282 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4283 ) 4284 4285 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4286 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4287 as_modifier and self._match_text_seq("USING", "SAMPLE") 4288 ): 4289 return None 4290 4291 bucket_numerator = None 4292 bucket_denominator = None 4293 bucket_field = None 4294 percent = None 4295 size = None 4296 seed = None 4297 4298 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4299 matched_l_paren = self._match(TokenType.L_PAREN) 4300 4301 if self.TABLESAMPLE_CSV: 4302 num = None 4303 expressions = self._parse_csv(self._parse_primary) 4304 else: 4305 expressions = None 4306 num = ( 4307 self._parse_factor() 4308 if self._match(TokenType.NUMBER, advance=False) 4309 else self._parse_primary() or self._parse_placeholder() 4310 ) 4311 4312 if self._match_text_seq("BUCKET"): 4313 bucket_numerator = self._parse_number() 4314 self._match_text_seq("OUT", "OF") 4315 bucket_denominator = bucket_denominator = self._parse_number() 4316 self._match(TokenType.ON) 4317 bucket_field = self._parse_field() 4318 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4319 percent = num 4320 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4321 size = num 4322 else: 4323 percent = num 4324 4325 if matched_l_paren: 4326 self._match_r_paren() 4327 4328 if self._match(TokenType.L_PAREN): 4329 method = self._parse_var(upper=True) 4330 seed = self._match(TokenType.COMMA) and self._parse_number() 4331 self._match_r_paren() 4332 elif self._match_texts(("SEED", "REPEATABLE")): 4333 seed = self._parse_wrapped(self._parse_number) 4334 4335 if not method and self.DEFAULT_SAMPLING_METHOD: 4336 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4337 4338 return self.expression( 4339 exp.TableSample, 4340 expressions=expressions, 4341 method=method, 4342 bucket_numerator=bucket_numerator, 4343 bucket_denominator=bucket_denominator, 4344 bucket_field=bucket_field, 4345 percent=percent, 4346 size=size, 4347 seed=seed, 4348 ) 4349 4350 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4351 return list(iter(self._parse_pivot, None)) or None 4352 4353 def _parse_joins(self) -> t.Iterator[exp.Join]: 4354 return iter(self._parse_join, None) 4355 4356 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4357 if not self._match(TokenType.INTO): 4358 return None 4359 4360 return self.expression( 4361 exp.UnpivotColumns, 4362 this=self._match_text_seq("NAME") and self._parse_column(), 4363 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4364 ) 4365 4366 # https://duckdb.org/docs/sql/statements/pivot 4367 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4368 def _parse_on() -> t.Optional[exp.Expression]: 4369 this = self._parse_bitwise() 4370 4371 if self._match(TokenType.IN): 4372 # PIVOT ... ON col IN (row_val1, row_val2) 4373 return self._parse_in(this) 4374 if self._match(TokenType.ALIAS, advance=False): 4375 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4376 return self._parse_alias(this) 4377 4378 return this 4379 4380 this = self._parse_table() 4381 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4382 into = self._parse_unpivot_columns() 4383 using = self._match(TokenType.USING) and self._parse_csv( 4384 lambda: self._parse_alias(self._parse_function()) 4385 ) 4386 group = self._parse_group() 4387 4388 return self.expression( 4389 exp.Pivot, 4390 this=this, 4391 expressions=expressions, 4392 using=using, 4393 group=group, 4394 unpivot=is_unpivot, 4395 into=into, 4396 ) 4397 4398 def _parse_pivot_in(self) -> exp.In: 4399 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4400 this = self._parse_select_or_expression() 4401 4402 self._match(TokenType.ALIAS) 4403 alias = self._parse_bitwise() 4404 if alias: 4405 if isinstance(alias, exp.Column) and not alias.db: 4406 alias = alias.this 4407 return self.expression(exp.PivotAlias, this=this, alias=alias) 4408 4409 return this 4410 4411 value = self._parse_column() 4412 4413 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4414 self.raise_error("Expecting IN (") 4415 4416 if self._match(TokenType.ANY): 4417 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4418 else: 4419 exprs = self._parse_csv(_parse_aliased_expression) 4420 4421 self._match_r_paren() 4422 return self.expression(exp.In, this=value, expressions=exprs) 4423 4424 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4425 func = self._parse_function() 4426 if not func: 4427 if self._prev and self._prev.token_type == TokenType.COMMA: 4428 return None 4429 self.raise_error("Expecting an aggregation function in PIVOT") 4430 4431 return self._parse_alias(func) 4432 4433 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4434 index = self._index 4435 include_nulls = None 4436 4437 if self._match(TokenType.PIVOT): 4438 unpivot = False 4439 elif self._match(TokenType.UNPIVOT): 4440 unpivot = True 4441 4442 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4443 if self._match_text_seq("INCLUDE", "NULLS"): 4444 include_nulls = True 4445 elif self._match_text_seq("EXCLUDE", "NULLS"): 4446 include_nulls = False 4447 else: 4448 return None 4449 4450 expressions = [] 4451 4452 if not self._match(TokenType.L_PAREN): 4453 self._retreat(index) 4454 return None 4455 4456 if unpivot: 4457 expressions = self._parse_csv(self._parse_column) 4458 else: 4459 expressions = self._parse_csv(self._parse_pivot_aggregation) 4460 4461 if not expressions: 4462 self.raise_error("Failed to parse PIVOT's aggregation list") 4463 4464 if not self._match(TokenType.FOR): 4465 self.raise_error("Expecting FOR") 4466 4467 fields = [] 4468 while True: 4469 field = self._try_parse(self._parse_pivot_in) 4470 if not field: 4471 break 4472 fields.append(field) 4473 4474 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4475 self._parse_bitwise 4476 ) 4477 4478 group = self._parse_group() 4479 4480 self._match_r_paren() 4481 4482 pivot = self.expression( 4483 exp.Pivot, 4484 expressions=expressions, 4485 fields=fields, 4486 unpivot=unpivot, 4487 include_nulls=include_nulls, 4488 default_on_null=default_on_null, 4489 group=group, 4490 ) 4491 4492 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4493 pivot.set("alias", self._parse_table_alias()) 4494 4495 if not unpivot: 4496 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4497 4498 columns: t.List[exp.Expression] = [] 4499 all_fields = [] 4500 for pivot_field in pivot.fields: 4501 pivot_field_expressions = pivot_field.expressions 4502 4503 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4504 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4505 continue 4506 4507 all_fields.append( 4508 [ 4509 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4510 for fld in pivot_field_expressions 4511 ] 4512 ) 4513 4514 if all_fields: 4515 if names: 4516 all_fields.append(names) 4517 4518 # Generate all possible combinations of the pivot columns 4519 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4520 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4521 for fld_parts_tuple in itertools.product(*all_fields): 4522 fld_parts = list(fld_parts_tuple) 4523 4524 if names and self.PREFIXED_PIVOT_COLUMNS: 4525 # Move the "name" to the front of the list 4526 fld_parts.insert(0, fld_parts.pop(-1)) 4527 4528 columns.append(exp.to_identifier("_".join(fld_parts))) 4529 4530 pivot.set("columns", columns) 4531 4532 return pivot 4533 4534 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4535 return [agg.alias for agg in aggregations if agg.alias] 4536 4537 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4538 if not skip_where_token and not self._match(TokenType.PREWHERE): 4539 return None 4540 4541 return self.expression( 4542 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4543 ) 4544 4545 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4546 if not skip_where_token and not self._match(TokenType.WHERE): 4547 return None 4548 4549 return self.expression( 4550 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4551 ) 4552 4553 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4554 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4555 return None 4556 comments = self._prev_comments 4557 4558 elements: t.Dict[str, t.Any] = defaultdict(list) 4559 4560 if self._match(TokenType.ALL): 4561 elements["all"] = True 4562 elif self._match(TokenType.DISTINCT): 4563 elements["all"] = False 4564 4565 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4566 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4567 4568 while True: 4569 index = self._index 4570 4571 elements["expressions"].extend( 4572 self._parse_csv( 4573 lambda: None 4574 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4575 else self._parse_assignment() 4576 ) 4577 ) 4578 4579 before_with_index = self._index 4580 with_prefix = self._match(TokenType.WITH) 4581 4582 if self._match(TokenType.ROLLUP): 4583 elements["rollup"].append( 4584 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4585 ) 4586 elif self._match(TokenType.CUBE): 4587 elements["cube"].append( 4588 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4589 ) 4590 elif self._match(TokenType.GROUPING_SETS): 4591 elements["grouping_sets"].append( 4592 self.expression( 4593 exp.GroupingSets, 4594 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4595 ) 4596 ) 4597 elif self._match_text_seq("TOTALS"): 4598 elements["totals"] = True # type: ignore 4599 4600 if before_with_index <= self._index <= before_with_index + 1: 4601 self._retreat(before_with_index) 4602 break 4603 4604 if index == self._index: 4605 break 4606 4607 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4608 4609 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4610 return self.expression( 4611 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4612 ) 4613 4614 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4615 if self._match(TokenType.L_PAREN): 4616 grouping_set = self._parse_csv(self._parse_column) 4617 self._match_r_paren() 4618 return self.expression(exp.Tuple, expressions=grouping_set) 4619 4620 return self._parse_column() 4621 4622 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4623 if not skip_having_token and not self._match(TokenType.HAVING): 4624 return None 4625 return self.expression( 4626 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4627 ) 4628 4629 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4630 if not self._match(TokenType.QUALIFY): 4631 return None 4632 return self.expression(exp.Qualify, this=self._parse_assignment()) 4633 4634 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4635 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4636 exp.Prior, this=self._parse_bitwise() 4637 ) 4638 connect = self._parse_assignment() 4639 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4640 return connect 4641 4642 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4643 if skip_start_token: 4644 start = None 4645 elif self._match(TokenType.START_WITH): 4646 start = self._parse_assignment() 4647 else: 4648 return None 4649 4650 self._match(TokenType.CONNECT_BY) 4651 nocycle = self._match_text_seq("NOCYCLE") 4652 connect = self._parse_connect_with_prior() 4653 4654 if not start and self._match(TokenType.START_WITH): 4655 start = self._parse_assignment() 4656 4657 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4658 4659 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4660 this = self._parse_id_var(any_token=True) 4661 if self._match(TokenType.ALIAS): 4662 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4663 return this 4664 4665 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4666 if self._match_text_seq("INTERPOLATE"): 4667 return self._parse_wrapped_csv(self._parse_name_as_expression) 4668 return None 4669 4670 def _parse_order( 4671 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4672 ) -> t.Optional[exp.Expression]: 4673 siblings = None 4674 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4675 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4676 return this 4677 4678 siblings = True 4679 4680 return self.expression( 4681 exp.Order, 4682 comments=self._prev_comments, 4683 this=this, 4684 expressions=self._parse_csv(self._parse_ordered), 4685 siblings=siblings, 4686 ) 4687 4688 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4689 if not self._match(token): 4690 return None 4691 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4692 4693 def _parse_ordered( 4694 self, parse_method: t.Optional[t.Callable] = None 4695 ) -> t.Optional[exp.Ordered]: 4696 this = parse_method() if parse_method else self._parse_assignment() 4697 if not this: 4698 return None 4699 4700 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4701 this = exp.var("ALL") 4702 4703 asc = self._match(TokenType.ASC) 4704 desc = self._match(TokenType.DESC) or (asc and False) 4705 4706 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4707 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4708 4709 nulls_first = is_nulls_first or False 4710 explicitly_null_ordered = is_nulls_first or is_nulls_last 4711 4712 if ( 4713 not explicitly_null_ordered 4714 and ( 4715 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4716 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4717 ) 4718 and self.dialect.NULL_ORDERING != "nulls_are_last" 4719 ): 4720 nulls_first = True 4721 4722 if self._match_text_seq("WITH", "FILL"): 4723 with_fill = self.expression( 4724 exp.WithFill, 4725 **{ # type: ignore 4726 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4727 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4728 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4729 "interpolate": self._parse_interpolate(), 4730 }, 4731 ) 4732 else: 4733 with_fill = None 4734 4735 return self.expression( 4736 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4737 ) 4738 4739 def _parse_limit_options(self) -> exp.LimitOptions: 4740 percent = self._match(TokenType.PERCENT) 4741 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4742 self._match_text_seq("ONLY") 4743 with_ties = self._match_text_seq("WITH", "TIES") 4744 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4745 4746 def _parse_limit( 4747 self, 4748 this: t.Optional[exp.Expression] = None, 4749 top: bool = False, 4750 skip_limit_token: bool = False, 4751 ) -> t.Optional[exp.Expression]: 4752 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4753 comments = self._prev_comments 4754 if top: 4755 limit_paren = self._match(TokenType.L_PAREN) 4756 expression = self._parse_term() if limit_paren else self._parse_number() 4757 4758 if limit_paren: 4759 self._match_r_paren() 4760 4761 limit_options = self._parse_limit_options() 4762 else: 4763 limit_options = None 4764 expression = self._parse_term() 4765 4766 if self._match(TokenType.COMMA): 4767 offset = expression 4768 expression = self._parse_term() 4769 else: 4770 offset = None 4771 4772 limit_exp = self.expression( 4773 exp.Limit, 4774 this=this, 4775 expression=expression, 4776 offset=offset, 4777 comments=comments, 4778 limit_options=limit_options, 4779 expressions=self._parse_limit_by(), 4780 ) 4781 4782 return limit_exp 4783 4784 if self._match(TokenType.FETCH): 4785 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4786 direction = self._prev.text.upper() if direction else "FIRST" 4787 4788 count = self._parse_field(tokens=self.FETCH_TOKENS) 4789 4790 return self.expression( 4791 exp.Fetch, 4792 direction=direction, 4793 count=count, 4794 limit_options=self._parse_limit_options(), 4795 ) 4796 4797 return this 4798 4799 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4800 if not self._match(TokenType.OFFSET): 4801 return this 4802 4803 count = self._parse_term() 4804 self._match_set((TokenType.ROW, TokenType.ROWS)) 4805 4806 return self.expression( 4807 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4808 ) 4809 4810 def _can_parse_limit_or_offset(self) -> bool: 4811 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4812 return False 4813 4814 index = self._index 4815 result = bool( 4816 self._try_parse(self._parse_limit, retreat=True) 4817 or self._try_parse(self._parse_offset, retreat=True) 4818 ) 4819 self._retreat(index) 4820 return result 4821 4822 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4823 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4824 4825 def _parse_locks(self) -> t.List[exp.Lock]: 4826 locks = [] 4827 while True: 4828 update, key = None, None 4829 if self._match_text_seq("FOR", "UPDATE"): 4830 update = True 4831 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4832 "LOCK", "IN", "SHARE", "MODE" 4833 ): 4834 update = False 4835 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4836 update, key = False, True 4837 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4838 update, key = True, True 4839 else: 4840 break 4841 4842 expressions = None 4843 if self._match_text_seq("OF"): 4844 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4845 4846 wait: t.Optional[bool | exp.Expression] = None 4847 if self._match_text_seq("NOWAIT"): 4848 wait = True 4849 elif self._match_text_seq("WAIT"): 4850 wait = self._parse_primary() 4851 elif self._match_text_seq("SKIP", "LOCKED"): 4852 wait = False 4853 4854 locks.append( 4855 self.expression( 4856 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4857 ) 4858 ) 4859 4860 return locks 4861 4862 def parse_set_operation( 4863 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4864 ) -> t.Optional[exp.Expression]: 4865 start = self._index 4866 _, side_token, kind_token = self._parse_join_parts() 4867 4868 side = side_token.text if side_token else None 4869 kind = kind_token.text if kind_token else None 4870 4871 if not self._match_set(self.SET_OPERATIONS): 4872 self._retreat(start) 4873 return None 4874 4875 token_type = self._prev.token_type 4876 4877 if token_type == TokenType.UNION: 4878 operation: t.Type[exp.SetOperation] = exp.Union 4879 elif token_type == TokenType.EXCEPT: 4880 operation = exp.Except 4881 else: 4882 operation = exp.Intersect 4883 4884 comments = self._prev.comments 4885 4886 if self._match(TokenType.DISTINCT): 4887 distinct: t.Optional[bool] = True 4888 elif self._match(TokenType.ALL): 4889 distinct = False 4890 else: 4891 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4892 if distinct is None: 4893 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4894 4895 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4896 "STRICT", "CORRESPONDING" 4897 ) 4898 if self._match_text_seq("CORRESPONDING"): 4899 by_name = True 4900 if not side and not kind: 4901 kind = "INNER" 4902 4903 on_column_list = None 4904 if by_name and self._match_texts(("ON", "BY")): 4905 on_column_list = self._parse_wrapped_csv(self._parse_column) 4906 4907 expression = self._parse_select( 4908 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4909 ) 4910 4911 return self.expression( 4912 operation, 4913 comments=comments, 4914 this=this, 4915 distinct=distinct, 4916 by_name=by_name, 4917 expression=expression, 4918 side=side, 4919 kind=kind, 4920 on=on_column_list, 4921 ) 4922 4923 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4924 while this: 4925 setop = self.parse_set_operation(this) 4926 if not setop: 4927 break 4928 this = setop 4929 4930 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4931 expression = this.expression 4932 4933 if expression: 4934 for arg in self.SET_OP_MODIFIERS: 4935 expr = expression.args.get(arg) 4936 if expr: 4937 this.set(arg, expr.pop()) 4938 4939 return this 4940 4941 def _parse_expression(self) -> t.Optional[exp.Expression]: 4942 return self._parse_alias(self._parse_assignment()) 4943 4944 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4945 this = self._parse_disjunction() 4946 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4947 # This allows us to parse <non-identifier token> := <expr> 4948 this = exp.column( 4949 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4950 ) 4951 4952 while self._match_set(self.ASSIGNMENT): 4953 if isinstance(this, exp.Column) and len(this.parts) == 1: 4954 this = this.this 4955 4956 this = self.expression( 4957 self.ASSIGNMENT[self._prev.token_type], 4958 this=this, 4959 comments=self._prev_comments, 4960 expression=self._parse_assignment(), 4961 ) 4962 4963 return this 4964 4965 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4966 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4967 4968 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4969 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4970 4971 def _parse_equality(self) -> t.Optional[exp.Expression]: 4972 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4973 4974 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4975 return self._parse_tokens(self._parse_range, self.COMPARISON) 4976 4977 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4978 this = this or self._parse_bitwise() 4979 negate = self._match(TokenType.NOT) 4980 4981 if self._match_set(self.RANGE_PARSERS): 4982 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4983 if not expression: 4984 return this 4985 4986 this = expression 4987 elif self._match(TokenType.ISNULL): 4988 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4989 4990 # Postgres supports ISNULL and NOTNULL for conditions. 4991 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4992 if self._match(TokenType.NOTNULL): 4993 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4994 this = self.expression(exp.Not, this=this) 4995 4996 if negate: 4997 this = self._negate_range(this) 4998 4999 if self._match(TokenType.IS): 5000 this = self._parse_is(this) 5001 5002 return this 5003 5004 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5005 if not this: 5006 return this 5007 5008 return self.expression(exp.Not, this=this) 5009 5010 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5011 index = self._index - 1 5012 negate = self._match(TokenType.NOT) 5013 5014 if self._match_text_seq("DISTINCT", "FROM"): 5015 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5016 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5017 5018 if self._match(TokenType.JSON): 5019 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5020 5021 if self._match_text_seq("WITH"): 5022 _with = True 5023 elif self._match_text_seq("WITHOUT"): 5024 _with = False 5025 else: 5026 _with = None 5027 5028 unique = self._match(TokenType.UNIQUE) 5029 self._match_text_seq("KEYS") 5030 expression: t.Optional[exp.Expression] = self.expression( 5031 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5032 ) 5033 else: 5034 expression = self._parse_primary() or self._parse_null() 5035 if not expression: 5036 self._retreat(index) 5037 return None 5038 5039 this = self.expression(exp.Is, this=this, expression=expression) 5040 return self.expression(exp.Not, this=this) if negate else this 5041 5042 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5043 unnest = self._parse_unnest(with_alias=False) 5044 if unnest: 5045 this = self.expression(exp.In, this=this, unnest=unnest) 5046 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5047 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5048 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5049 5050 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5051 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5052 else: 5053 this = self.expression(exp.In, this=this, expressions=expressions) 5054 5055 if matched_l_paren: 5056 self._match_r_paren(this) 5057 elif not self._match(TokenType.R_BRACKET, expression=this): 5058 self.raise_error("Expecting ]") 5059 else: 5060 this = self.expression(exp.In, this=this, field=self._parse_column()) 5061 5062 return this 5063 5064 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5065 symmetric = None 5066 if self._match_text_seq("SYMMETRIC"): 5067 symmetric = True 5068 elif self._match_text_seq("ASYMMETRIC"): 5069 symmetric = False 5070 5071 low = self._parse_bitwise() 5072 self._match(TokenType.AND) 5073 high = self._parse_bitwise() 5074 5075 return self.expression( 5076 exp.Between, 5077 this=this, 5078 low=low, 5079 high=high, 5080 symmetric=symmetric, 5081 ) 5082 5083 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5084 if not self._match(TokenType.ESCAPE): 5085 return this 5086 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5087 5088 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5089 index = self._index 5090 5091 if not self._match(TokenType.INTERVAL) and match_interval: 5092 return None 5093 5094 if self._match(TokenType.STRING, advance=False): 5095 this = self._parse_primary() 5096 else: 5097 this = self._parse_term() 5098 5099 if not this or ( 5100 isinstance(this, exp.Column) 5101 and not this.table 5102 and not this.this.quoted 5103 and this.name.upper() == "IS" 5104 ): 5105 self._retreat(index) 5106 return None 5107 5108 unit = self._parse_function() or ( 5109 not self._match(TokenType.ALIAS, advance=False) 5110 and self._parse_var(any_token=True, upper=True) 5111 ) 5112 5113 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5114 # each INTERVAL expression into this canonical form so it's easy to transpile 5115 if this and this.is_number: 5116 this = exp.Literal.string(this.to_py()) 5117 elif this and this.is_string: 5118 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5119 if parts and unit: 5120 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5121 unit = None 5122 self._retreat(self._index - 1) 5123 5124 if len(parts) == 1: 5125 this = exp.Literal.string(parts[0][0]) 5126 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5127 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5128 unit = self.expression( 5129 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5130 ) 5131 5132 interval = self.expression(exp.Interval, this=this, unit=unit) 5133 5134 index = self._index 5135 self._match(TokenType.PLUS) 5136 5137 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5138 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5139 return self.expression( 5140 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5141 ) 5142 5143 self._retreat(index) 5144 return interval 5145 5146 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5147 this = self._parse_term() 5148 5149 while True: 5150 if self._match_set(self.BITWISE): 5151 this = self.expression( 5152 self.BITWISE[self._prev.token_type], 5153 this=this, 5154 expression=self._parse_term(), 5155 ) 5156 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5157 this = self.expression( 5158 exp.DPipe, 5159 this=this, 5160 expression=self._parse_term(), 5161 safe=not self.dialect.STRICT_STRING_CONCAT, 5162 ) 5163 elif self._match(TokenType.DQMARK): 5164 this = self.expression( 5165 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5166 ) 5167 elif self._match_pair(TokenType.LT, TokenType.LT): 5168 this = self.expression( 5169 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5170 ) 5171 elif self._match_pair(TokenType.GT, TokenType.GT): 5172 this = self.expression( 5173 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5174 ) 5175 else: 5176 break 5177 5178 return this 5179 5180 def _parse_term(self) -> t.Optional[exp.Expression]: 5181 this = self._parse_factor() 5182 5183 while self._match_set(self.TERM): 5184 klass = self.TERM[self._prev.token_type] 5185 comments = self._prev_comments 5186 expression = self._parse_factor() 5187 5188 this = self.expression(klass, this=this, comments=comments, expression=expression) 5189 5190 if isinstance(this, exp.Collate): 5191 expr = this.expression 5192 5193 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5194 # fallback to Identifier / Var 5195 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5196 ident = expr.this 5197 if isinstance(ident, exp.Identifier): 5198 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5199 5200 return this 5201 5202 def _parse_factor(self) -> t.Optional[exp.Expression]: 5203 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5204 this = parse_method() 5205 5206 while self._match_set(self.FACTOR): 5207 klass = self.FACTOR[self._prev.token_type] 5208 comments = self._prev_comments 5209 expression = parse_method() 5210 5211 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5212 self._retreat(self._index - 1) 5213 return this 5214 5215 this = self.expression(klass, this=this, comments=comments, expression=expression) 5216 5217 if isinstance(this, exp.Div): 5218 this.args["typed"] = self.dialect.TYPED_DIVISION 5219 this.args["safe"] = self.dialect.SAFE_DIVISION 5220 5221 return this 5222 5223 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5224 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5225 5226 def _parse_unary(self) -> t.Optional[exp.Expression]: 5227 if self._match_set(self.UNARY_PARSERS): 5228 return self.UNARY_PARSERS[self._prev.token_type](self) 5229 return self._parse_at_time_zone(self._parse_type()) 5230 5231 def _parse_type( 5232 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5233 ) -> t.Optional[exp.Expression]: 5234 interval = parse_interval and self._parse_interval() 5235 if interval: 5236 return interval 5237 5238 index = self._index 5239 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5240 5241 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5242 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5243 if isinstance(data_type, exp.Cast): 5244 # This constructor can contain ops directly after it, for instance struct unnesting: 5245 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5246 return self._parse_column_ops(data_type) 5247 5248 if data_type: 5249 index2 = self._index 5250 this = self._parse_primary() 5251 5252 if isinstance(this, exp.Literal): 5253 literal = this.name 5254 this = self._parse_column_ops(this) 5255 5256 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5257 if parser: 5258 return parser(self, this, data_type) 5259 5260 if ( 5261 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5262 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5263 and TIME_ZONE_RE.search(literal) 5264 ): 5265 data_type = exp.DataType.build("TIMESTAMPTZ") 5266 5267 return self.expression(exp.Cast, this=this, to=data_type) 5268 5269 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5270 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5271 # 5272 # If the index difference here is greater than 1, that means the parser itself must have 5273 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5274 # 5275 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5276 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5277 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5278 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5279 # 5280 # In these cases, we don't really want to return the converted type, but instead retreat 5281 # and try to parse a Column or Identifier in the section below. 5282 if data_type.expressions and index2 - index > 1: 5283 self._retreat(index2) 5284 return self._parse_column_ops(data_type) 5285 5286 self._retreat(index) 5287 5288 if fallback_to_identifier: 5289 return self._parse_id_var() 5290 5291 this = self._parse_column() 5292 return this and self._parse_column_ops(this) 5293 5294 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5295 this = self._parse_type() 5296 if not this: 5297 return None 5298 5299 if isinstance(this, exp.Column) and not this.table: 5300 this = exp.var(this.name.upper()) 5301 5302 return self.expression( 5303 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5304 ) 5305 5306 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5307 type_name = identifier.name 5308 5309 while self._match(TokenType.DOT): 5310 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5311 5312 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5313 5314 def _parse_types( 5315 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5316 ) -> t.Optional[exp.Expression]: 5317 index = self._index 5318 5319 this: t.Optional[exp.Expression] = None 5320 prefix = self._match_text_seq("SYSUDTLIB", ".") 5321 5322 if self._match_set(self.TYPE_TOKENS): 5323 type_token = self._prev.token_type 5324 else: 5325 type_token = None 5326 identifier = allow_identifiers and self._parse_id_var( 5327 any_token=False, tokens=(TokenType.VAR,) 5328 ) 5329 if isinstance(identifier, exp.Identifier): 5330 try: 5331 tokens = self.dialect.tokenize(identifier.name) 5332 except TokenError: 5333 tokens = None 5334 5335 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5336 type_token = tokens[0].token_type 5337 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5338 this = self._parse_user_defined_type(identifier) 5339 else: 5340 self._retreat(self._index - 1) 5341 return None 5342 else: 5343 return None 5344 5345 if type_token == TokenType.PSEUDO_TYPE: 5346 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5347 5348 if type_token == TokenType.OBJECT_IDENTIFIER: 5349 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5350 5351 # https://materialize.com/docs/sql/types/map/ 5352 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5353 key_type = self._parse_types( 5354 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5355 ) 5356 if not self._match(TokenType.FARROW): 5357 self._retreat(index) 5358 return None 5359 5360 value_type = self._parse_types( 5361 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5362 ) 5363 if not self._match(TokenType.R_BRACKET): 5364 self._retreat(index) 5365 return None 5366 5367 return exp.DataType( 5368 this=exp.DataType.Type.MAP, 5369 expressions=[key_type, value_type], 5370 nested=True, 5371 prefix=prefix, 5372 ) 5373 5374 nested = type_token in self.NESTED_TYPE_TOKENS 5375 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5376 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5377 expressions = None 5378 maybe_func = False 5379 5380 if self._match(TokenType.L_PAREN): 5381 if is_struct: 5382 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5383 elif nested: 5384 expressions = self._parse_csv( 5385 lambda: self._parse_types( 5386 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5387 ) 5388 ) 5389 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5390 this = expressions[0] 5391 this.set("nullable", True) 5392 self._match_r_paren() 5393 return this 5394 elif type_token in self.ENUM_TYPE_TOKENS: 5395 expressions = self._parse_csv(self._parse_equality) 5396 elif is_aggregate: 5397 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5398 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5399 ) 5400 if not func_or_ident: 5401 return None 5402 expressions = [func_or_ident] 5403 if self._match(TokenType.COMMA): 5404 expressions.extend( 5405 self._parse_csv( 5406 lambda: self._parse_types( 5407 check_func=check_func, 5408 schema=schema, 5409 allow_identifiers=allow_identifiers, 5410 ) 5411 ) 5412 ) 5413 else: 5414 expressions = self._parse_csv(self._parse_type_size) 5415 5416 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5417 if type_token == TokenType.VECTOR and len(expressions) == 2: 5418 expressions = self._parse_vector_expressions(expressions) 5419 5420 if not self._match(TokenType.R_PAREN): 5421 self._retreat(index) 5422 return None 5423 5424 maybe_func = True 5425 5426 values: t.Optional[t.List[exp.Expression]] = None 5427 5428 if nested and self._match(TokenType.LT): 5429 if is_struct: 5430 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5431 else: 5432 expressions = self._parse_csv( 5433 lambda: self._parse_types( 5434 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5435 ) 5436 ) 5437 5438 if not self._match(TokenType.GT): 5439 self.raise_error("Expecting >") 5440 5441 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5442 values = self._parse_csv(self._parse_assignment) 5443 if not values and is_struct: 5444 values = None 5445 self._retreat(self._index - 1) 5446 else: 5447 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5448 5449 if type_token in self.TIMESTAMPS: 5450 if self._match_text_seq("WITH", "TIME", "ZONE"): 5451 maybe_func = False 5452 tz_type = ( 5453 exp.DataType.Type.TIMETZ 5454 if type_token in self.TIMES 5455 else exp.DataType.Type.TIMESTAMPTZ 5456 ) 5457 this = exp.DataType(this=tz_type, expressions=expressions) 5458 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5459 maybe_func = False 5460 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5461 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5462 maybe_func = False 5463 elif type_token == TokenType.INTERVAL: 5464 unit = self._parse_var(upper=True) 5465 if unit: 5466 if self._match_text_seq("TO"): 5467 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5468 5469 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5470 else: 5471 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5472 elif type_token == TokenType.VOID: 5473 this = exp.DataType(this=exp.DataType.Type.NULL) 5474 5475 if maybe_func and check_func: 5476 index2 = self._index 5477 peek = self._parse_string() 5478 5479 if not peek: 5480 self._retreat(index) 5481 return None 5482 5483 self._retreat(index2) 5484 5485 if not this: 5486 if self._match_text_seq("UNSIGNED"): 5487 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5488 if not unsigned_type_token: 5489 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5490 5491 type_token = unsigned_type_token or type_token 5492 5493 this = exp.DataType( 5494 this=exp.DataType.Type[type_token.value], 5495 expressions=expressions, 5496 nested=nested, 5497 prefix=prefix, 5498 ) 5499 5500 # Empty arrays/structs are allowed 5501 if values is not None: 5502 cls = exp.Struct if is_struct else exp.Array 5503 this = exp.cast(cls(expressions=values), this, copy=False) 5504 5505 elif expressions: 5506 this.set("expressions", expressions) 5507 5508 # https://materialize.com/docs/sql/types/list/#type-name 5509 while self._match(TokenType.LIST): 5510 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5511 5512 index = self._index 5513 5514 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5515 matched_array = self._match(TokenType.ARRAY) 5516 5517 while self._curr: 5518 datatype_token = self._prev.token_type 5519 matched_l_bracket = self._match(TokenType.L_BRACKET) 5520 5521 if (not matched_l_bracket and not matched_array) or ( 5522 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5523 ): 5524 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5525 # not to be confused with the fixed size array parsing 5526 break 5527 5528 matched_array = False 5529 values = self._parse_csv(self._parse_assignment) or None 5530 if ( 5531 values 5532 and not schema 5533 and ( 5534 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5535 ) 5536 ): 5537 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5538 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5539 self._retreat(index) 5540 break 5541 5542 this = exp.DataType( 5543 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5544 ) 5545 self._match(TokenType.R_BRACKET) 5546 5547 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5548 converter = self.TYPE_CONVERTERS.get(this.this) 5549 if converter: 5550 this = converter(t.cast(exp.DataType, this)) 5551 5552 return this 5553 5554 def _parse_vector_expressions( 5555 self, expressions: t.List[exp.Expression] 5556 ) -> t.List[exp.Expression]: 5557 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5558 5559 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5560 index = self._index 5561 5562 if ( 5563 self._curr 5564 and self._next 5565 and self._curr.token_type in self.TYPE_TOKENS 5566 and self._next.token_type in self.TYPE_TOKENS 5567 ): 5568 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5569 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5570 this = self._parse_id_var() 5571 else: 5572 this = ( 5573 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5574 or self._parse_id_var() 5575 ) 5576 5577 self._match(TokenType.COLON) 5578 5579 if ( 5580 type_required 5581 and not isinstance(this, exp.DataType) 5582 and not self._match_set(self.TYPE_TOKENS, advance=False) 5583 ): 5584 self._retreat(index) 5585 return self._parse_types() 5586 5587 return self._parse_column_def(this) 5588 5589 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5590 if not self._match_text_seq("AT", "TIME", "ZONE"): 5591 return this 5592 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5593 5594 def _parse_column(self) -> t.Optional[exp.Expression]: 5595 this = self._parse_column_reference() 5596 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5597 5598 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5599 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5600 5601 return column 5602 5603 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5604 this = self._parse_field() 5605 if ( 5606 not this 5607 and self._match(TokenType.VALUES, advance=False) 5608 and self.VALUES_FOLLOWED_BY_PAREN 5609 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5610 ): 5611 this = self._parse_id_var() 5612 5613 if isinstance(this, exp.Identifier): 5614 # We bubble up comments from the Identifier to the Column 5615 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5616 5617 return this 5618 5619 def _parse_colon_as_variant_extract( 5620 self, this: t.Optional[exp.Expression] 5621 ) -> t.Optional[exp.Expression]: 5622 casts = [] 5623 json_path = [] 5624 escape = None 5625 5626 while self._match(TokenType.COLON): 5627 start_index = self._index 5628 5629 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5630 path = self._parse_column_ops( 5631 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5632 ) 5633 5634 # The cast :: operator has a lower precedence than the extraction operator :, so 5635 # we rearrange the AST appropriately to avoid casting the JSON path 5636 while isinstance(path, exp.Cast): 5637 casts.append(path.to) 5638 path = path.this 5639 5640 if casts: 5641 dcolon_offset = next( 5642 i 5643 for i, t in enumerate(self._tokens[start_index:]) 5644 if t.token_type == TokenType.DCOLON 5645 ) 5646 end_token = self._tokens[start_index + dcolon_offset - 1] 5647 else: 5648 end_token = self._prev 5649 5650 if path: 5651 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5652 # it'll roundtrip to a string literal in GET_PATH 5653 if isinstance(path, exp.Identifier) and path.quoted: 5654 escape = True 5655 5656 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5657 5658 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5659 # Databricks transforms it back to the colon/dot notation 5660 if json_path: 5661 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5662 5663 if json_path_expr: 5664 json_path_expr.set("escape", escape) 5665 5666 this = self.expression( 5667 exp.JSONExtract, 5668 this=this, 5669 expression=json_path_expr, 5670 variant_extract=True, 5671 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5672 ) 5673 5674 while casts: 5675 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5676 5677 return this 5678 5679 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5680 return self._parse_types() 5681 5682 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5683 this = self._parse_bracket(this) 5684 5685 while self._match_set(self.COLUMN_OPERATORS): 5686 op_token = self._prev.token_type 5687 op = self.COLUMN_OPERATORS.get(op_token) 5688 5689 if op_token in self.CAST_COLUMN_OPERATORS: 5690 field = self._parse_dcolon() 5691 if not field: 5692 self.raise_error("Expected type") 5693 elif op and self._curr: 5694 field = self._parse_column_reference() or self._parse_bracket() 5695 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5696 field = self._parse_column_ops(field) 5697 else: 5698 field = self._parse_field(any_token=True, anonymous_func=True) 5699 5700 # Function calls can be qualified, e.g., x.y.FOO() 5701 # This converts the final AST to a series of Dots leading to the function call 5702 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5703 if isinstance(field, (exp.Func, exp.Window)) and this: 5704 this = this.transform( 5705 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5706 ) 5707 5708 if op: 5709 this = op(self, this, field) 5710 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5711 this = self.expression( 5712 exp.Column, 5713 comments=this.comments, 5714 this=field, 5715 table=this.this, 5716 db=this.args.get("table"), 5717 catalog=this.args.get("db"), 5718 ) 5719 elif isinstance(field, exp.Window): 5720 # Move the exp.Dot's to the window's function 5721 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5722 field.set("this", window_func) 5723 this = field 5724 else: 5725 this = self.expression(exp.Dot, this=this, expression=field) 5726 5727 if field and field.comments: 5728 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5729 5730 this = self._parse_bracket(this) 5731 5732 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5733 5734 def _parse_paren(self) -> t.Optional[exp.Expression]: 5735 if not self._match(TokenType.L_PAREN): 5736 return None 5737 5738 comments = self._prev_comments 5739 query = self._parse_select() 5740 5741 if query: 5742 expressions = [query] 5743 else: 5744 expressions = self._parse_expressions() 5745 5746 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5747 5748 if not this and self._match(TokenType.R_PAREN, advance=False): 5749 this = self.expression(exp.Tuple) 5750 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5751 this = self._parse_subquery(this=this, parse_alias=False) 5752 elif isinstance(this, exp.Subquery): 5753 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5754 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5755 this = self.expression(exp.Tuple, expressions=expressions) 5756 else: 5757 this = self.expression(exp.Paren, this=this) 5758 5759 if this: 5760 this.add_comments(comments) 5761 5762 self._match_r_paren(expression=this) 5763 return this 5764 5765 def _parse_primary(self) -> t.Optional[exp.Expression]: 5766 if self._match_set(self.PRIMARY_PARSERS): 5767 token_type = self._prev.token_type 5768 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5769 5770 if token_type == TokenType.STRING: 5771 expressions = [primary] 5772 while self._match(TokenType.STRING): 5773 expressions.append(exp.Literal.string(self._prev.text)) 5774 5775 if len(expressions) > 1: 5776 return self.expression(exp.Concat, expressions=expressions) 5777 5778 return primary 5779 5780 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5781 return exp.Literal.number(f"0.{self._prev.text}") 5782 5783 return self._parse_paren() 5784 5785 def _parse_field( 5786 self, 5787 any_token: bool = False, 5788 tokens: t.Optional[t.Collection[TokenType]] = None, 5789 anonymous_func: bool = False, 5790 ) -> t.Optional[exp.Expression]: 5791 if anonymous_func: 5792 field = ( 5793 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5794 or self._parse_primary() 5795 ) 5796 else: 5797 field = self._parse_primary() or self._parse_function( 5798 anonymous=anonymous_func, any_token=any_token 5799 ) 5800 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5801 5802 def _parse_function( 5803 self, 5804 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5805 anonymous: bool = False, 5806 optional_parens: bool = True, 5807 any_token: bool = False, 5808 ) -> t.Optional[exp.Expression]: 5809 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5810 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5811 fn_syntax = False 5812 if ( 5813 self._match(TokenType.L_BRACE, advance=False) 5814 and self._next 5815 and self._next.text.upper() == "FN" 5816 ): 5817 self._advance(2) 5818 fn_syntax = True 5819 5820 func = self._parse_function_call( 5821 functions=functions, 5822 anonymous=anonymous, 5823 optional_parens=optional_parens, 5824 any_token=any_token, 5825 ) 5826 5827 if fn_syntax: 5828 self._match(TokenType.R_BRACE) 5829 5830 return func 5831 5832 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5833 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5834 5835 def _parse_function_call( 5836 self, 5837 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5838 anonymous: bool = False, 5839 optional_parens: bool = True, 5840 any_token: bool = False, 5841 ) -> t.Optional[exp.Expression]: 5842 if not self._curr: 5843 return None 5844 5845 comments = self._curr.comments 5846 prev = self._prev 5847 token = self._curr 5848 token_type = self._curr.token_type 5849 this = self._curr.text 5850 upper = this.upper() 5851 5852 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5853 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5854 self._advance() 5855 return self._parse_window(parser(self)) 5856 5857 if not self._next or self._next.token_type != TokenType.L_PAREN: 5858 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5859 self._advance() 5860 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5861 5862 return None 5863 5864 if any_token: 5865 if token_type in self.RESERVED_TOKENS: 5866 return None 5867 elif token_type not in self.FUNC_TOKENS: 5868 return None 5869 5870 self._advance(2) 5871 5872 parser = self.FUNCTION_PARSERS.get(upper) 5873 if parser and not anonymous: 5874 this = parser(self) 5875 else: 5876 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5877 5878 if subquery_predicate: 5879 expr = None 5880 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5881 expr = self._parse_select() 5882 self._match_r_paren() 5883 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5884 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5885 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5886 self._advance(-1) 5887 expr = self._parse_bitwise() 5888 5889 if expr: 5890 return self.expression(subquery_predicate, comments=comments, this=expr) 5891 5892 if functions is None: 5893 functions = self.FUNCTIONS 5894 5895 function = functions.get(upper) 5896 known_function = function and not anonymous 5897 5898 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5899 args = self._parse_function_args(alias) 5900 5901 post_func_comments = self._curr and self._curr.comments 5902 if known_function and post_func_comments: 5903 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5904 # call we'll construct it as exp.Anonymous, even if it's "known" 5905 if any( 5906 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5907 for comment in post_func_comments 5908 ): 5909 known_function = False 5910 5911 if alias and known_function: 5912 args = self._kv_to_prop_eq(args) 5913 5914 if known_function: 5915 func_builder = t.cast(t.Callable, function) 5916 5917 if "dialect" in func_builder.__code__.co_varnames: 5918 func = func_builder(args, dialect=self.dialect) 5919 else: 5920 func = func_builder(args) 5921 5922 func = self.validate_expression(func, args) 5923 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5924 func.meta["name"] = this 5925 5926 this = func 5927 else: 5928 if token_type == TokenType.IDENTIFIER: 5929 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5930 5931 this = self.expression(exp.Anonymous, this=this, expressions=args) 5932 this = this.update_positions(token) 5933 5934 if isinstance(this, exp.Expression): 5935 this.add_comments(comments) 5936 5937 self._match_r_paren(this) 5938 return self._parse_window(this) 5939 5940 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5941 return expression 5942 5943 def _kv_to_prop_eq( 5944 self, expressions: t.List[exp.Expression], parse_map: bool = False 5945 ) -> t.List[exp.Expression]: 5946 transformed = [] 5947 5948 for index, e in enumerate(expressions): 5949 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5950 if isinstance(e, exp.Alias): 5951 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5952 5953 if not isinstance(e, exp.PropertyEQ): 5954 e = self.expression( 5955 exp.PropertyEQ, 5956 this=e.this if parse_map else exp.to_identifier(e.this.name), 5957 expression=e.expression, 5958 ) 5959 5960 if isinstance(e.this, exp.Column): 5961 e.this.replace(e.this.this) 5962 else: 5963 e = self._to_prop_eq(e, index) 5964 5965 transformed.append(e) 5966 5967 return transformed 5968 5969 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5970 return self._parse_statement() 5971 5972 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5973 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5974 5975 def _parse_user_defined_function( 5976 self, kind: t.Optional[TokenType] = None 5977 ) -> t.Optional[exp.Expression]: 5978 this = self._parse_table_parts(schema=True) 5979 5980 if not self._match(TokenType.L_PAREN): 5981 return this 5982 5983 expressions = self._parse_csv(self._parse_function_parameter) 5984 self._match_r_paren() 5985 return self.expression( 5986 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5987 ) 5988 5989 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5990 literal = self._parse_primary() 5991 if literal: 5992 return self.expression(exp.Introducer, this=token.text, expression=literal) 5993 5994 return self._identifier_expression(token) 5995 5996 def _parse_session_parameter(self) -> exp.SessionParameter: 5997 kind = None 5998 this = self._parse_id_var() or self._parse_primary() 5999 6000 if this and self._match(TokenType.DOT): 6001 kind = this.name 6002 this = self._parse_var() or self._parse_primary() 6003 6004 return self.expression(exp.SessionParameter, this=this, kind=kind) 6005 6006 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6007 return self._parse_id_var() 6008 6009 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6010 index = self._index 6011 6012 if self._match(TokenType.L_PAREN): 6013 expressions = t.cast( 6014 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6015 ) 6016 6017 if not self._match(TokenType.R_PAREN): 6018 self._retreat(index) 6019 else: 6020 expressions = [self._parse_lambda_arg()] 6021 6022 if self._match_set(self.LAMBDAS): 6023 return self.LAMBDAS[self._prev.token_type](self, expressions) 6024 6025 self._retreat(index) 6026 6027 this: t.Optional[exp.Expression] 6028 6029 if self._match(TokenType.DISTINCT): 6030 this = self.expression( 6031 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6032 ) 6033 else: 6034 this = self._parse_select_or_expression(alias=alias) 6035 6036 return self._parse_limit( 6037 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6038 ) 6039 6040 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6041 index = self._index 6042 if not self._match(TokenType.L_PAREN): 6043 return this 6044 6045 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6046 # expr can be of both types 6047 if self._match_set(self.SELECT_START_TOKENS): 6048 self._retreat(index) 6049 return this 6050 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6051 self._match_r_paren() 6052 return self.expression(exp.Schema, this=this, expressions=args) 6053 6054 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6055 return self._parse_column_def(self._parse_field(any_token=True)) 6056 6057 def _parse_column_def( 6058 self, this: t.Optional[exp.Expression], computed_column: bool = True 6059 ) -> t.Optional[exp.Expression]: 6060 # column defs are not really columns, they're identifiers 6061 if isinstance(this, exp.Column): 6062 this = this.this 6063 6064 if not computed_column: 6065 self._match(TokenType.ALIAS) 6066 6067 kind = self._parse_types(schema=True) 6068 6069 if self._match_text_seq("FOR", "ORDINALITY"): 6070 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6071 6072 constraints: t.List[exp.Expression] = [] 6073 6074 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6075 ("ALIAS", "MATERIALIZED") 6076 ): 6077 persisted = self._prev.text.upper() == "MATERIALIZED" 6078 constraint_kind = exp.ComputedColumnConstraint( 6079 this=self._parse_assignment(), 6080 persisted=persisted or self._match_text_seq("PERSISTED"), 6081 data_type=exp.Var(this="AUTO") 6082 if self._match_text_seq("AUTO") 6083 else self._parse_types(), 6084 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6085 ) 6086 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6087 elif ( 6088 kind 6089 and self._match(TokenType.ALIAS, advance=False) 6090 and ( 6091 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6092 or (self._next and self._next.token_type == TokenType.L_PAREN) 6093 ) 6094 ): 6095 self._advance() 6096 constraints.append( 6097 self.expression( 6098 exp.ColumnConstraint, 6099 kind=exp.ComputedColumnConstraint( 6100 this=self._parse_disjunction(), 6101 persisted=self._match_texts(("STORED", "VIRTUAL")) 6102 and self._prev.text.upper() == "STORED", 6103 ), 6104 ) 6105 ) 6106 6107 while True: 6108 constraint = self._parse_column_constraint() 6109 if not constraint: 6110 break 6111 constraints.append(constraint) 6112 6113 if not kind and not constraints: 6114 return this 6115 6116 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6117 6118 def _parse_auto_increment( 6119 self, 6120 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6121 start = None 6122 increment = None 6123 order = None 6124 6125 if self._match(TokenType.L_PAREN, advance=False): 6126 args = self._parse_wrapped_csv(self._parse_bitwise) 6127 start = seq_get(args, 0) 6128 increment = seq_get(args, 1) 6129 elif self._match_text_seq("START"): 6130 start = self._parse_bitwise() 6131 self._match_text_seq("INCREMENT") 6132 increment = self._parse_bitwise() 6133 if self._match_text_seq("ORDER"): 6134 order = True 6135 elif self._match_text_seq("NOORDER"): 6136 order = False 6137 6138 if start and increment: 6139 return exp.GeneratedAsIdentityColumnConstraint( 6140 start=start, increment=increment, this=False, order=order 6141 ) 6142 6143 return exp.AutoIncrementColumnConstraint() 6144 6145 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6146 if not self._match_text_seq("REFRESH"): 6147 self._retreat(self._index - 1) 6148 return None 6149 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6150 6151 def _parse_compress(self) -> exp.CompressColumnConstraint: 6152 if self._match(TokenType.L_PAREN, advance=False): 6153 return self.expression( 6154 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6155 ) 6156 6157 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6158 6159 def _parse_generated_as_identity( 6160 self, 6161 ) -> ( 6162 exp.GeneratedAsIdentityColumnConstraint 6163 | exp.ComputedColumnConstraint 6164 | exp.GeneratedAsRowColumnConstraint 6165 ): 6166 if self._match_text_seq("BY", "DEFAULT"): 6167 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6168 this = self.expression( 6169 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6170 ) 6171 else: 6172 self._match_text_seq("ALWAYS") 6173 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6174 6175 self._match(TokenType.ALIAS) 6176 6177 if self._match_text_seq("ROW"): 6178 start = self._match_text_seq("START") 6179 if not start: 6180 self._match(TokenType.END) 6181 hidden = self._match_text_seq("HIDDEN") 6182 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6183 6184 identity = self._match_text_seq("IDENTITY") 6185 6186 if self._match(TokenType.L_PAREN): 6187 if self._match(TokenType.START_WITH): 6188 this.set("start", self._parse_bitwise()) 6189 if self._match_text_seq("INCREMENT", "BY"): 6190 this.set("increment", self._parse_bitwise()) 6191 if self._match_text_seq("MINVALUE"): 6192 this.set("minvalue", self._parse_bitwise()) 6193 if self._match_text_seq("MAXVALUE"): 6194 this.set("maxvalue", self._parse_bitwise()) 6195 6196 if self._match_text_seq("CYCLE"): 6197 this.set("cycle", True) 6198 elif self._match_text_seq("NO", "CYCLE"): 6199 this.set("cycle", False) 6200 6201 if not identity: 6202 this.set("expression", self._parse_range()) 6203 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6204 args = self._parse_csv(self._parse_bitwise) 6205 this.set("start", seq_get(args, 0)) 6206 this.set("increment", seq_get(args, 1)) 6207 6208 self._match_r_paren() 6209 6210 return this 6211 6212 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6213 self._match_text_seq("LENGTH") 6214 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6215 6216 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6217 if self._match_text_seq("NULL"): 6218 return self.expression(exp.NotNullColumnConstraint) 6219 if self._match_text_seq("CASESPECIFIC"): 6220 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6221 if self._match_text_seq("FOR", "REPLICATION"): 6222 return self.expression(exp.NotForReplicationColumnConstraint) 6223 6224 # Unconsume the `NOT` token 6225 self._retreat(self._index - 1) 6226 return None 6227 6228 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6229 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6230 6231 procedure_option_follows = ( 6232 self._match(TokenType.WITH, advance=False) 6233 and self._next 6234 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6235 ) 6236 6237 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6238 return self.expression( 6239 exp.ColumnConstraint, 6240 this=this, 6241 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6242 ) 6243 6244 return this 6245 6246 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6247 if not self._match(TokenType.CONSTRAINT): 6248 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6249 6250 return self.expression( 6251 exp.Constraint, 6252 this=self._parse_id_var(), 6253 expressions=self._parse_unnamed_constraints(), 6254 ) 6255 6256 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6257 constraints = [] 6258 while True: 6259 constraint = self._parse_unnamed_constraint() or self._parse_function() 6260 if not constraint: 6261 break 6262 constraints.append(constraint) 6263 6264 return constraints 6265 6266 def _parse_unnamed_constraint( 6267 self, constraints: t.Optional[t.Collection[str]] = None 6268 ) -> t.Optional[exp.Expression]: 6269 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6270 constraints or self.CONSTRAINT_PARSERS 6271 ): 6272 return None 6273 6274 constraint = self._prev.text.upper() 6275 if constraint not in self.CONSTRAINT_PARSERS: 6276 self.raise_error(f"No parser found for schema constraint {constraint}.") 6277 6278 return self.CONSTRAINT_PARSERS[constraint](self) 6279 6280 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6281 return self._parse_id_var(any_token=False) 6282 6283 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6284 self._match_texts(("KEY", "INDEX")) 6285 return self.expression( 6286 exp.UniqueColumnConstraint, 6287 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6288 this=self._parse_schema(self._parse_unique_key()), 6289 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6290 on_conflict=self._parse_on_conflict(), 6291 options=self._parse_key_constraint_options(), 6292 ) 6293 6294 def _parse_key_constraint_options(self) -> t.List[str]: 6295 options = [] 6296 while True: 6297 if not self._curr: 6298 break 6299 6300 if self._match(TokenType.ON): 6301 action = None 6302 on = self._advance_any() and self._prev.text 6303 6304 if self._match_text_seq("NO", "ACTION"): 6305 action = "NO ACTION" 6306 elif self._match_text_seq("CASCADE"): 6307 action = "CASCADE" 6308 elif self._match_text_seq("RESTRICT"): 6309 action = "RESTRICT" 6310 elif self._match_pair(TokenType.SET, TokenType.NULL): 6311 action = "SET NULL" 6312 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6313 action = "SET DEFAULT" 6314 else: 6315 self.raise_error("Invalid key constraint") 6316 6317 options.append(f"ON {on} {action}") 6318 else: 6319 var = self._parse_var_from_options( 6320 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6321 ) 6322 if not var: 6323 break 6324 options.append(var.name) 6325 6326 return options 6327 6328 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6329 if match and not self._match(TokenType.REFERENCES): 6330 return None 6331 6332 expressions = None 6333 this = self._parse_table(schema=True) 6334 options = self._parse_key_constraint_options() 6335 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6336 6337 def _parse_foreign_key(self) -> exp.ForeignKey: 6338 expressions = ( 6339 self._parse_wrapped_id_vars() 6340 if not self._match(TokenType.REFERENCES, advance=False) 6341 else None 6342 ) 6343 reference = self._parse_references() 6344 on_options = {} 6345 6346 while self._match(TokenType.ON): 6347 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6348 self.raise_error("Expected DELETE or UPDATE") 6349 6350 kind = self._prev.text.lower() 6351 6352 if self._match_text_seq("NO", "ACTION"): 6353 action = "NO ACTION" 6354 elif self._match(TokenType.SET): 6355 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6356 action = "SET " + self._prev.text.upper() 6357 else: 6358 self._advance() 6359 action = self._prev.text.upper() 6360 6361 on_options[kind] = action 6362 6363 return self.expression( 6364 exp.ForeignKey, 6365 expressions=expressions, 6366 reference=reference, 6367 options=self._parse_key_constraint_options(), 6368 **on_options, # type: ignore 6369 ) 6370 6371 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6372 return self._parse_ordered() or self._parse_field() 6373 6374 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6375 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6376 self._retreat(self._index - 1) 6377 return None 6378 6379 id_vars = self._parse_wrapped_id_vars() 6380 return self.expression( 6381 exp.PeriodForSystemTimeConstraint, 6382 this=seq_get(id_vars, 0), 6383 expression=seq_get(id_vars, 1), 6384 ) 6385 6386 def _parse_primary_key( 6387 self, wrapped_optional: bool = False, in_props: bool = False 6388 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6389 desc = ( 6390 self._match_set((TokenType.ASC, TokenType.DESC)) 6391 and self._prev.token_type == TokenType.DESC 6392 ) 6393 6394 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6395 return self.expression( 6396 exp.PrimaryKeyColumnConstraint, 6397 desc=desc, 6398 options=self._parse_key_constraint_options(), 6399 ) 6400 6401 expressions = self._parse_wrapped_csv( 6402 self._parse_primary_key_part, optional=wrapped_optional 6403 ) 6404 6405 return self.expression( 6406 exp.PrimaryKey, 6407 expressions=expressions, 6408 include=self._parse_index_params(), 6409 options=self._parse_key_constraint_options(), 6410 ) 6411 6412 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6413 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6414 6415 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6416 """ 6417 Parses a datetime column in ODBC format. We parse the column into the corresponding 6418 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6419 same as we did for `DATE('yyyy-mm-dd')`. 6420 6421 Reference: 6422 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6423 """ 6424 self._match(TokenType.VAR) 6425 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6426 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6427 if not self._match(TokenType.R_BRACE): 6428 self.raise_error("Expected }") 6429 return expression 6430 6431 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6432 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6433 return this 6434 6435 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6436 map_token = seq_get(self._tokens, self._index - 2) 6437 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6438 else: 6439 parse_map = False 6440 6441 bracket_kind = self._prev.token_type 6442 if ( 6443 bracket_kind == TokenType.L_BRACE 6444 and self._curr 6445 and self._curr.token_type == TokenType.VAR 6446 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6447 ): 6448 return self._parse_odbc_datetime_literal() 6449 6450 expressions = self._parse_csv( 6451 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6452 ) 6453 6454 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6455 self.raise_error("Expected ]") 6456 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6457 self.raise_error("Expected }") 6458 6459 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6460 if bracket_kind == TokenType.L_BRACE: 6461 this = self.expression( 6462 exp.Struct, 6463 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6464 ) 6465 elif not this: 6466 this = build_array_constructor( 6467 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6468 ) 6469 else: 6470 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6471 if constructor_type: 6472 return build_array_constructor( 6473 constructor_type, 6474 args=expressions, 6475 bracket_kind=bracket_kind, 6476 dialect=self.dialect, 6477 ) 6478 6479 expressions = apply_index_offset( 6480 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6481 ) 6482 this = self.expression( 6483 exp.Bracket, 6484 this=this, 6485 expressions=expressions, 6486 comments=this.pop_comments(), 6487 ) 6488 6489 self._add_comments(this) 6490 return self._parse_bracket(this) 6491 6492 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6493 if self._match(TokenType.COLON): 6494 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6495 return this 6496 6497 def _parse_case(self) -> t.Optional[exp.Expression]: 6498 if self._match(TokenType.DOT, advance=False): 6499 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6500 self._retreat(self._index - 1) 6501 return None 6502 6503 ifs = [] 6504 default = None 6505 6506 comments = self._prev_comments 6507 expression = self._parse_assignment() 6508 6509 while self._match(TokenType.WHEN): 6510 this = self._parse_assignment() 6511 self._match(TokenType.THEN) 6512 then = self._parse_assignment() 6513 ifs.append(self.expression(exp.If, this=this, true=then)) 6514 6515 if self._match(TokenType.ELSE): 6516 default = self._parse_assignment() 6517 6518 if not self._match(TokenType.END): 6519 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6520 default = exp.column("interval") 6521 else: 6522 self.raise_error("Expected END after CASE", self._prev) 6523 6524 return self.expression( 6525 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6526 ) 6527 6528 def _parse_if(self) -> t.Optional[exp.Expression]: 6529 if self._match(TokenType.L_PAREN): 6530 args = self._parse_csv( 6531 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6532 ) 6533 this = self.validate_expression(exp.If.from_arg_list(args), args) 6534 self._match_r_paren() 6535 else: 6536 index = self._index - 1 6537 6538 if self.NO_PAREN_IF_COMMANDS and index == 0: 6539 return self._parse_as_command(self._prev) 6540 6541 condition = self._parse_assignment() 6542 6543 if not condition: 6544 self._retreat(index) 6545 return None 6546 6547 self._match(TokenType.THEN) 6548 true = self._parse_assignment() 6549 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6550 self._match(TokenType.END) 6551 this = self.expression(exp.If, this=condition, true=true, false=false) 6552 6553 return this 6554 6555 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6556 if not self._match_text_seq("VALUE", "FOR"): 6557 self._retreat(self._index - 1) 6558 return None 6559 6560 return self.expression( 6561 exp.NextValueFor, 6562 this=self._parse_column(), 6563 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6564 ) 6565 6566 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6567 this = self._parse_function() or self._parse_var_or_string(upper=True) 6568 6569 if self._match(TokenType.FROM): 6570 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6571 6572 if not self._match(TokenType.COMMA): 6573 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6574 6575 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6576 6577 def _parse_gap_fill(self) -> exp.GapFill: 6578 self._match(TokenType.TABLE) 6579 this = self._parse_table() 6580 6581 self._match(TokenType.COMMA) 6582 args = [this, *self._parse_csv(self._parse_lambda)] 6583 6584 gap_fill = exp.GapFill.from_arg_list(args) 6585 return self.validate_expression(gap_fill, args) 6586 6587 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6588 this = self._parse_assignment() 6589 6590 if not self._match(TokenType.ALIAS): 6591 if self._match(TokenType.COMMA): 6592 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6593 6594 self.raise_error("Expected AS after CAST") 6595 6596 fmt = None 6597 to = self._parse_types() 6598 6599 default = self._match(TokenType.DEFAULT) 6600 if default: 6601 default = self._parse_bitwise() 6602 self._match_text_seq("ON", "CONVERSION", "ERROR") 6603 6604 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6605 fmt_string = self._parse_string() 6606 fmt = self._parse_at_time_zone(fmt_string) 6607 6608 if not to: 6609 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6610 if to.this in exp.DataType.TEMPORAL_TYPES: 6611 this = self.expression( 6612 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6613 this=this, 6614 format=exp.Literal.string( 6615 format_time( 6616 fmt_string.this if fmt_string else "", 6617 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6618 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6619 ) 6620 ), 6621 safe=safe, 6622 ) 6623 6624 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6625 this.set("zone", fmt.args["zone"]) 6626 return this 6627 elif not to: 6628 self.raise_error("Expected TYPE after CAST") 6629 elif isinstance(to, exp.Identifier): 6630 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6631 elif to.this == exp.DataType.Type.CHAR: 6632 if self._match(TokenType.CHARACTER_SET): 6633 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6634 6635 return self.build_cast( 6636 strict=strict, 6637 this=this, 6638 to=to, 6639 format=fmt, 6640 safe=safe, 6641 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6642 default=default, 6643 ) 6644 6645 def _parse_string_agg(self) -> exp.GroupConcat: 6646 if self._match(TokenType.DISTINCT): 6647 args: t.List[t.Optional[exp.Expression]] = [ 6648 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6649 ] 6650 if self._match(TokenType.COMMA): 6651 args.extend(self._parse_csv(self._parse_assignment)) 6652 else: 6653 args = self._parse_csv(self._parse_assignment) # type: ignore 6654 6655 if self._match_text_seq("ON", "OVERFLOW"): 6656 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6657 if self._match_text_seq("ERROR"): 6658 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6659 else: 6660 self._match_text_seq("TRUNCATE") 6661 on_overflow = self.expression( 6662 exp.OverflowTruncateBehavior, 6663 this=self._parse_string(), 6664 with_count=( 6665 self._match_text_seq("WITH", "COUNT") 6666 or not self._match_text_seq("WITHOUT", "COUNT") 6667 ), 6668 ) 6669 else: 6670 on_overflow = None 6671 6672 index = self._index 6673 if not self._match(TokenType.R_PAREN) and args: 6674 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6675 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6676 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6677 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6678 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6679 6680 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6681 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6682 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6683 if not self._match_text_seq("WITHIN", "GROUP"): 6684 self._retreat(index) 6685 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6686 6687 # The corresponding match_r_paren will be called in parse_function (caller) 6688 self._match_l_paren() 6689 6690 return self.expression( 6691 exp.GroupConcat, 6692 this=self._parse_order(this=seq_get(args, 0)), 6693 separator=seq_get(args, 1), 6694 on_overflow=on_overflow, 6695 ) 6696 6697 def _parse_convert( 6698 self, strict: bool, safe: t.Optional[bool] = None 6699 ) -> t.Optional[exp.Expression]: 6700 this = self._parse_bitwise() 6701 6702 if self._match(TokenType.USING): 6703 to: t.Optional[exp.Expression] = self.expression( 6704 exp.CharacterSet, this=self._parse_var() 6705 ) 6706 elif self._match(TokenType.COMMA): 6707 to = self._parse_types() 6708 else: 6709 to = None 6710 6711 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6712 6713 def _parse_xml_table(self) -> exp.XMLTable: 6714 namespaces = None 6715 passing = None 6716 columns = None 6717 6718 if self._match_text_seq("XMLNAMESPACES", "("): 6719 namespaces = self._parse_xml_namespace() 6720 self._match_text_seq(")", ",") 6721 6722 this = self._parse_string() 6723 6724 if self._match_text_seq("PASSING"): 6725 # The BY VALUE keywords are optional and are provided for semantic clarity 6726 self._match_text_seq("BY", "VALUE") 6727 passing = self._parse_csv(self._parse_column) 6728 6729 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6730 6731 if self._match_text_seq("COLUMNS"): 6732 columns = self._parse_csv(self._parse_field_def) 6733 6734 return self.expression( 6735 exp.XMLTable, 6736 this=this, 6737 namespaces=namespaces, 6738 passing=passing, 6739 columns=columns, 6740 by_ref=by_ref, 6741 ) 6742 6743 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6744 namespaces = [] 6745 6746 while True: 6747 if self._match(TokenType.DEFAULT): 6748 uri = self._parse_string() 6749 else: 6750 uri = self._parse_alias(self._parse_string()) 6751 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6752 if not self._match(TokenType.COMMA): 6753 break 6754 6755 return namespaces 6756 6757 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6758 args = self._parse_csv(self._parse_assignment) 6759 6760 if len(args) < 3: 6761 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6762 6763 return self.expression(exp.DecodeCase, expressions=args) 6764 6765 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6766 self._match_text_seq("KEY") 6767 key = self._parse_column() 6768 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6769 self._match_text_seq("VALUE") 6770 value = self._parse_bitwise() 6771 6772 if not key and not value: 6773 return None 6774 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6775 6776 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6777 if not this or not self._match_text_seq("FORMAT", "JSON"): 6778 return this 6779 6780 return self.expression(exp.FormatJson, this=this) 6781 6782 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6783 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6784 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6785 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6786 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6787 else: 6788 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6789 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6790 6791 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6792 6793 if not empty and not error and not null: 6794 return None 6795 6796 return self.expression( 6797 exp.OnCondition, 6798 empty=empty, 6799 error=error, 6800 null=null, 6801 ) 6802 6803 def _parse_on_handling( 6804 self, on: str, *values: str 6805 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6806 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6807 for value in values: 6808 if self._match_text_seq(value, "ON", on): 6809 return f"{value} ON {on}" 6810 6811 index = self._index 6812 if self._match(TokenType.DEFAULT): 6813 default_value = self._parse_bitwise() 6814 if self._match_text_seq("ON", on): 6815 return default_value 6816 6817 self._retreat(index) 6818 6819 return None 6820 6821 @t.overload 6822 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6823 6824 @t.overload 6825 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6826 6827 def _parse_json_object(self, agg=False): 6828 star = self._parse_star() 6829 expressions = ( 6830 [star] 6831 if star 6832 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6833 ) 6834 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6835 6836 unique_keys = None 6837 if self._match_text_seq("WITH", "UNIQUE"): 6838 unique_keys = True 6839 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6840 unique_keys = False 6841 6842 self._match_text_seq("KEYS") 6843 6844 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6845 self._parse_type() 6846 ) 6847 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6848 6849 return self.expression( 6850 exp.JSONObjectAgg if agg else exp.JSONObject, 6851 expressions=expressions, 6852 null_handling=null_handling, 6853 unique_keys=unique_keys, 6854 return_type=return_type, 6855 encoding=encoding, 6856 ) 6857 6858 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6859 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6860 if not self._match_text_seq("NESTED"): 6861 this = self._parse_id_var() 6862 kind = self._parse_types(allow_identifiers=False) 6863 nested = None 6864 else: 6865 this = None 6866 kind = None 6867 nested = True 6868 6869 path = self._match_text_seq("PATH") and self._parse_string() 6870 nested_schema = nested and self._parse_json_schema() 6871 6872 return self.expression( 6873 exp.JSONColumnDef, 6874 this=this, 6875 kind=kind, 6876 path=path, 6877 nested_schema=nested_schema, 6878 ) 6879 6880 def _parse_json_schema(self) -> exp.JSONSchema: 6881 self._match_text_seq("COLUMNS") 6882 return self.expression( 6883 exp.JSONSchema, 6884 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6885 ) 6886 6887 def _parse_json_table(self) -> exp.JSONTable: 6888 this = self._parse_format_json(self._parse_bitwise()) 6889 path = self._match(TokenType.COMMA) and self._parse_string() 6890 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6891 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6892 schema = self._parse_json_schema() 6893 6894 return exp.JSONTable( 6895 this=this, 6896 schema=schema, 6897 path=path, 6898 error_handling=error_handling, 6899 empty_handling=empty_handling, 6900 ) 6901 6902 def _parse_match_against(self) -> exp.MatchAgainst: 6903 if self._match_text_seq("TABLE"): 6904 # parse SingleStore MATCH(TABLE ...) syntax 6905 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6906 expressions = [] 6907 table = self._parse_table() 6908 if table: 6909 expressions = [table] 6910 else: 6911 expressions = self._parse_csv(self._parse_column) 6912 6913 self._match_text_seq(")", "AGAINST", "(") 6914 6915 this = self._parse_string() 6916 6917 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6918 modifier = "IN NATURAL LANGUAGE MODE" 6919 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6920 modifier = f"{modifier} WITH QUERY EXPANSION" 6921 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6922 modifier = "IN BOOLEAN MODE" 6923 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6924 modifier = "WITH QUERY EXPANSION" 6925 else: 6926 modifier = None 6927 6928 return self.expression( 6929 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6930 ) 6931 6932 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6933 def _parse_open_json(self) -> exp.OpenJSON: 6934 this = self._parse_bitwise() 6935 path = self._match(TokenType.COMMA) and self._parse_string() 6936 6937 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6938 this = self._parse_field(any_token=True) 6939 kind = self._parse_types() 6940 path = self._parse_string() 6941 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6942 6943 return self.expression( 6944 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6945 ) 6946 6947 expressions = None 6948 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6949 self._match_l_paren() 6950 expressions = self._parse_csv(_parse_open_json_column_def) 6951 6952 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6953 6954 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6955 args = self._parse_csv(self._parse_bitwise) 6956 6957 if self._match(TokenType.IN): 6958 return self.expression( 6959 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6960 ) 6961 6962 if haystack_first: 6963 haystack = seq_get(args, 0) 6964 needle = seq_get(args, 1) 6965 else: 6966 haystack = seq_get(args, 1) 6967 needle = seq_get(args, 0) 6968 6969 return self.expression( 6970 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6971 ) 6972 6973 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6974 args = self._parse_csv(self._parse_table) 6975 return exp.JoinHint(this=func_name.upper(), expressions=args) 6976 6977 def _parse_substring(self) -> exp.Substring: 6978 # Postgres supports the form: substring(string [from int] [for int]) 6979 # (despite being undocumented, the reverse order also works) 6980 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6981 6982 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6983 6984 start, length = None, None 6985 6986 while self._curr: 6987 if self._match(TokenType.FROM): 6988 start = self._parse_bitwise() 6989 elif self._match(TokenType.FOR): 6990 if not start: 6991 start = exp.Literal.number(1) 6992 length = self._parse_bitwise() 6993 else: 6994 break 6995 6996 if start: 6997 args.append(start) 6998 if length: 6999 args.append(length) 7000 7001 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7002 7003 def _parse_trim(self) -> exp.Trim: 7004 # https://www.w3resource.com/sql/character-functions/trim.php 7005 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7006 7007 position = None 7008 collation = None 7009 expression = None 7010 7011 if self._match_texts(self.TRIM_TYPES): 7012 position = self._prev.text.upper() 7013 7014 this = self._parse_bitwise() 7015 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7016 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7017 expression = self._parse_bitwise() 7018 7019 if invert_order: 7020 this, expression = expression, this 7021 7022 if self._match(TokenType.COLLATE): 7023 collation = self._parse_bitwise() 7024 7025 return self.expression( 7026 exp.Trim, this=this, position=position, expression=expression, collation=collation 7027 ) 7028 7029 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7030 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7031 7032 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7033 return self._parse_window(self._parse_id_var(), alias=True) 7034 7035 def _parse_respect_or_ignore_nulls( 7036 self, this: t.Optional[exp.Expression] 7037 ) -> t.Optional[exp.Expression]: 7038 if self._match_text_seq("IGNORE", "NULLS"): 7039 return self.expression(exp.IgnoreNulls, this=this) 7040 if self._match_text_seq("RESPECT", "NULLS"): 7041 return self.expression(exp.RespectNulls, this=this) 7042 return this 7043 7044 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7045 if self._match(TokenType.HAVING): 7046 self._match_texts(("MAX", "MIN")) 7047 max = self._prev.text.upper() != "MIN" 7048 return self.expression( 7049 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7050 ) 7051 7052 return this 7053 7054 def _parse_window( 7055 self, this: t.Optional[exp.Expression], alias: bool = False 7056 ) -> t.Optional[exp.Expression]: 7057 func = this 7058 comments = func.comments if isinstance(func, exp.Expression) else None 7059 7060 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7061 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7062 if self._match_text_seq("WITHIN", "GROUP"): 7063 order = self._parse_wrapped(self._parse_order) 7064 this = self.expression(exp.WithinGroup, this=this, expression=order) 7065 7066 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7067 self._match(TokenType.WHERE) 7068 this = self.expression( 7069 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7070 ) 7071 self._match_r_paren() 7072 7073 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7074 # Some dialects choose to implement and some do not. 7075 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7076 7077 # There is some code above in _parse_lambda that handles 7078 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7079 7080 # The below changes handle 7081 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7082 7083 # Oracle allows both formats 7084 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7085 # and Snowflake chose to do the same for familiarity 7086 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7087 if isinstance(this, exp.AggFunc): 7088 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7089 7090 if ignore_respect and ignore_respect is not this: 7091 ignore_respect.replace(ignore_respect.this) 7092 this = self.expression(ignore_respect.__class__, this=this) 7093 7094 this = self._parse_respect_or_ignore_nulls(this) 7095 7096 # bigquery select from window x AS (partition by ...) 7097 if alias: 7098 over = None 7099 self._match(TokenType.ALIAS) 7100 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7101 return this 7102 else: 7103 over = self._prev.text.upper() 7104 7105 if comments and isinstance(func, exp.Expression): 7106 func.pop_comments() 7107 7108 if not self._match(TokenType.L_PAREN): 7109 return self.expression( 7110 exp.Window, 7111 comments=comments, 7112 this=this, 7113 alias=self._parse_id_var(False), 7114 over=over, 7115 ) 7116 7117 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7118 7119 first = self._match(TokenType.FIRST) 7120 if self._match_text_seq("LAST"): 7121 first = False 7122 7123 partition, order = self._parse_partition_and_order() 7124 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7125 7126 if kind: 7127 self._match(TokenType.BETWEEN) 7128 start = self._parse_window_spec() 7129 7130 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7131 exclude = ( 7132 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7133 if self._match_text_seq("EXCLUDE") 7134 else None 7135 ) 7136 7137 spec = self.expression( 7138 exp.WindowSpec, 7139 kind=kind, 7140 start=start["value"], 7141 start_side=start["side"], 7142 end=end.get("value"), 7143 end_side=end.get("side"), 7144 exclude=exclude, 7145 ) 7146 else: 7147 spec = None 7148 7149 self._match_r_paren() 7150 7151 window = self.expression( 7152 exp.Window, 7153 comments=comments, 7154 this=this, 7155 partition_by=partition, 7156 order=order, 7157 spec=spec, 7158 alias=window_alias, 7159 over=over, 7160 first=first, 7161 ) 7162 7163 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7164 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7165 return self._parse_window(window, alias=alias) 7166 7167 return window 7168 7169 def _parse_partition_and_order( 7170 self, 7171 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7172 return self._parse_partition_by(), self._parse_order() 7173 7174 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7175 self._match(TokenType.BETWEEN) 7176 7177 return { 7178 "value": ( 7179 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7180 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7181 or self._parse_type() 7182 ), 7183 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7184 } 7185 7186 def _parse_alias( 7187 self, this: t.Optional[exp.Expression], explicit: bool = False 7188 ) -> t.Optional[exp.Expression]: 7189 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7190 # so this section tries to parse the clause version and if it fails, it treats the token 7191 # as an identifier (alias) 7192 if self._can_parse_limit_or_offset(): 7193 return this 7194 7195 any_token = self._match(TokenType.ALIAS) 7196 comments = self._prev_comments or [] 7197 7198 if explicit and not any_token: 7199 return this 7200 7201 if self._match(TokenType.L_PAREN): 7202 aliases = self.expression( 7203 exp.Aliases, 7204 comments=comments, 7205 this=this, 7206 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7207 ) 7208 self._match_r_paren(aliases) 7209 return aliases 7210 7211 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7212 self.STRING_ALIASES and self._parse_string_as_identifier() 7213 ) 7214 7215 if alias: 7216 comments.extend(alias.pop_comments()) 7217 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7218 column = this.this 7219 7220 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7221 if not this.comments and column and column.comments: 7222 this.comments = column.pop_comments() 7223 7224 return this 7225 7226 def _parse_id_var( 7227 self, 7228 any_token: bool = True, 7229 tokens: t.Optional[t.Collection[TokenType]] = None, 7230 ) -> t.Optional[exp.Expression]: 7231 expression = self._parse_identifier() 7232 if not expression and ( 7233 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7234 ): 7235 quoted = self._prev.token_type == TokenType.STRING 7236 expression = self._identifier_expression(quoted=quoted) 7237 7238 return expression 7239 7240 def _parse_string(self) -> t.Optional[exp.Expression]: 7241 if self._match_set(self.STRING_PARSERS): 7242 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7243 return self._parse_placeholder() 7244 7245 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7246 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7247 if output: 7248 output.update_positions(self._prev) 7249 return output 7250 7251 def _parse_number(self) -> t.Optional[exp.Expression]: 7252 if self._match_set(self.NUMERIC_PARSERS): 7253 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7254 return self._parse_placeholder() 7255 7256 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7257 if self._match(TokenType.IDENTIFIER): 7258 return self._identifier_expression(quoted=True) 7259 return self._parse_placeholder() 7260 7261 def _parse_var( 7262 self, 7263 any_token: bool = False, 7264 tokens: t.Optional[t.Collection[TokenType]] = None, 7265 upper: bool = False, 7266 ) -> t.Optional[exp.Expression]: 7267 if ( 7268 (any_token and self._advance_any()) 7269 or self._match(TokenType.VAR) 7270 or (self._match_set(tokens) if tokens else False) 7271 ): 7272 return self.expression( 7273 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7274 ) 7275 return self._parse_placeholder() 7276 7277 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7278 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7279 self._advance() 7280 return self._prev 7281 return None 7282 7283 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7284 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7285 7286 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7287 return self._parse_primary() or self._parse_var(any_token=True) 7288 7289 def _parse_null(self) -> t.Optional[exp.Expression]: 7290 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7291 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7292 return self._parse_placeholder() 7293 7294 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7295 if self._match(TokenType.TRUE): 7296 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7297 if self._match(TokenType.FALSE): 7298 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7299 return self._parse_placeholder() 7300 7301 def _parse_star(self) -> t.Optional[exp.Expression]: 7302 if self._match(TokenType.STAR): 7303 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7304 return self._parse_placeholder() 7305 7306 def _parse_parameter(self) -> exp.Parameter: 7307 this = self._parse_identifier() or self._parse_primary_or_var() 7308 return self.expression(exp.Parameter, this=this) 7309 7310 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7311 if self._match_set(self.PLACEHOLDER_PARSERS): 7312 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7313 if placeholder: 7314 return placeholder 7315 self._advance(-1) 7316 return None 7317 7318 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7319 if not self._match_texts(keywords): 7320 return None 7321 if self._match(TokenType.L_PAREN, advance=False): 7322 return self._parse_wrapped_csv(self._parse_expression) 7323 7324 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7325 return [expression] if expression else None 7326 7327 def _parse_csv( 7328 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7329 ) -> t.List[exp.Expression]: 7330 parse_result = parse_method() 7331 items = [parse_result] if parse_result is not None else [] 7332 7333 while self._match(sep): 7334 self._add_comments(parse_result) 7335 parse_result = parse_method() 7336 if parse_result is not None: 7337 items.append(parse_result) 7338 7339 return items 7340 7341 def _parse_tokens( 7342 self, parse_method: t.Callable, expressions: t.Dict 7343 ) -> t.Optional[exp.Expression]: 7344 this = parse_method() 7345 7346 while self._match_set(expressions): 7347 this = self.expression( 7348 expressions[self._prev.token_type], 7349 this=this, 7350 comments=self._prev_comments, 7351 expression=parse_method(), 7352 ) 7353 7354 return this 7355 7356 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7357 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7358 7359 def _parse_wrapped_csv( 7360 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7361 ) -> t.List[exp.Expression]: 7362 return self._parse_wrapped( 7363 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7364 ) 7365 7366 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7367 wrapped = self._match(TokenType.L_PAREN) 7368 if not wrapped and not optional: 7369 self.raise_error("Expecting (") 7370 parse_result = parse_method() 7371 if wrapped: 7372 self._match_r_paren() 7373 return parse_result 7374 7375 def _parse_expressions(self) -> t.List[exp.Expression]: 7376 return self._parse_csv(self._parse_expression) 7377 7378 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7379 return ( 7380 self._parse_set_operations( 7381 self._parse_alias(self._parse_assignment(), explicit=True) 7382 if alias 7383 else self._parse_assignment() 7384 ) 7385 or self._parse_select() 7386 ) 7387 7388 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7389 return self._parse_query_modifiers( 7390 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7391 ) 7392 7393 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7394 this = None 7395 if self._match_texts(self.TRANSACTION_KIND): 7396 this = self._prev.text 7397 7398 self._match_texts(("TRANSACTION", "WORK")) 7399 7400 modes = [] 7401 while True: 7402 mode = [] 7403 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7404 mode.append(self._prev.text) 7405 7406 if mode: 7407 modes.append(" ".join(mode)) 7408 if not self._match(TokenType.COMMA): 7409 break 7410 7411 return self.expression(exp.Transaction, this=this, modes=modes) 7412 7413 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7414 chain = None 7415 savepoint = None 7416 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7417 7418 self._match_texts(("TRANSACTION", "WORK")) 7419 7420 if self._match_text_seq("TO"): 7421 self._match_text_seq("SAVEPOINT") 7422 savepoint = self._parse_id_var() 7423 7424 if self._match(TokenType.AND): 7425 chain = not self._match_text_seq("NO") 7426 self._match_text_seq("CHAIN") 7427 7428 if is_rollback: 7429 return self.expression(exp.Rollback, savepoint=savepoint) 7430 7431 return self.expression(exp.Commit, chain=chain) 7432 7433 def _parse_refresh(self) -> exp.Refresh: 7434 self._match(TokenType.TABLE) 7435 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7436 7437 def _parse_column_def_with_exists(self): 7438 start = self._index 7439 self._match(TokenType.COLUMN) 7440 7441 exists_column = self._parse_exists(not_=True) 7442 expression = self._parse_field_def() 7443 7444 if not isinstance(expression, exp.ColumnDef): 7445 self._retreat(start) 7446 return None 7447 7448 expression.set("exists", exists_column) 7449 7450 return expression 7451 7452 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7453 if not self._prev.text.upper() == "ADD": 7454 return None 7455 7456 expression = self._parse_column_def_with_exists() 7457 if not expression: 7458 return None 7459 7460 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7461 if self._match_texts(("FIRST", "AFTER")): 7462 position = self._prev.text 7463 column_position = self.expression( 7464 exp.ColumnPosition, this=self._parse_column(), position=position 7465 ) 7466 expression.set("position", column_position) 7467 7468 return expression 7469 7470 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7471 drop = self._match(TokenType.DROP) and self._parse_drop() 7472 if drop and not isinstance(drop, exp.Command): 7473 drop.set("kind", drop.args.get("kind", "COLUMN")) 7474 return drop 7475 7476 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7477 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7478 return self.expression( 7479 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7480 ) 7481 7482 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7483 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7484 self._match_text_seq("ADD") 7485 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7486 return self.expression( 7487 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7488 ) 7489 7490 column_def = self._parse_add_column() 7491 if isinstance(column_def, exp.ColumnDef): 7492 return column_def 7493 7494 exists = self._parse_exists(not_=True) 7495 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7496 return self.expression( 7497 exp.AddPartition, 7498 exists=exists, 7499 this=self._parse_field(any_token=True), 7500 location=self._match_text_seq("LOCATION", advance=False) 7501 and self._parse_property(), 7502 ) 7503 7504 return None 7505 7506 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7507 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7508 or self._match_text_seq("COLUMNS") 7509 ): 7510 schema = self._parse_schema() 7511 7512 return ( 7513 ensure_list(schema) 7514 if schema 7515 else self._parse_csv(self._parse_column_def_with_exists) 7516 ) 7517 7518 return self._parse_csv(_parse_add_alteration) 7519 7520 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7521 if self._match_texts(self.ALTER_ALTER_PARSERS): 7522 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7523 7524 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7525 # keyword after ALTER we default to parsing this statement 7526 self._match(TokenType.COLUMN) 7527 column = self._parse_field(any_token=True) 7528 7529 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7530 return self.expression(exp.AlterColumn, this=column, drop=True) 7531 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7532 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7533 if self._match(TokenType.COMMENT): 7534 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7535 if self._match_text_seq("DROP", "NOT", "NULL"): 7536 return self.expression( 7537 exp.AlterColumn, 7538 this=column, 7539 drop=True, 7540 allow_null=True, 7541 ) 7542 if self._match_text_seq("SET", "NOT", "NULL"): 7543 return self.expression( 7544 exp.AlterColumn, 7545 this=column, 7546 allow_null=False, 7547 ) 7548 7549 if self._match_text_seq("SET", "VISIBLE"): 7550 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7551 if self._match_text_seq("SET", "INVISIBLE"): 7552 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7553 7554 self._match_text_seq("SET", "DATA") 7555 self._match_text_seq("TYPE") 7556 return self.expression( 7557 exp.AlterColumn, 7558 this=column, 7559 dtype=self._parse_types(), 7560 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7561 using=self._match(TokenType.USING) and self._parse_assignment(), 7562 ) 7563 7564 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7565 if self._match_texts(("ALL", "EVEN", "AUTO")): 7566 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7567 7568 self._match_text_seq("KEY", "DISTKEY") 7569 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7570 7571 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7572 if compound: 7573 self._match_text_seq("SORTKEY") 7574 7575 if self._match(TokenType.L_PAREN, advance=False): 7576 return self.expression( 7577 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7578 ) 7579 7580 self._match_texts(("AUTO", "NONE")) 7581 return self.expression( 7582 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7583 ) 7584 7585 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7586 index = self._index - 1 7587 7588 partition_exists = self._parse_exists() 7589 if self._match(TokenType.PARTITION, advance=False): 7590 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7591 7592 self._retreat(index) 7593 return self._parse_csv(self._parse_drop_column) 7594 7595 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7596 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7597 exists = self._parse_exists() 7598 old_column = self._parse_column() 7599 to = self._match_text_seq("TO") 7600 new_column = self._parse_column() 7601 7602 if old_column is None or to is None or new_column is None: 7603 return None 7604 7605 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7606 7607 self._match_text_seq("TO") 7608 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7609 7610 def _parse_alter_table_set(self) -> exp.AlterSet: 7611 alter_set = self.expression(exp.AlterSet) 7612 7613 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7614 "TABLE", "PROPERTIES" 7615 ): 7616 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7617 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7618 alter_set.set("expressions", [self._parse_assignment()]) 7619 elif self._match_texts(("LOGGED", "UNLOGGED")): 7620 alter_set.set("option", exp.var(self._prev.text.upper())) 7621 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7622 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7623 elif self._match_text_seq("LOCATION"): 7624 alter_set.set("location", self._parse_field()) 7625 elif self._match_text_seq("ACCESS", "METHOD"): 7626 alter_set.set("access_method", self._parse_field()) 7627 elif self._match_text_seq("TABLESPACE"): 7628 alter_set.set("tablespace", self._parse_field()) 7629 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7630 alter_set.set("file_format", [self._parse_field()]) 7631 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7632 alter_set.set("file_format", self._parse_wrapped_options()) 7633 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7634 alter_set.set("copy_options", self._parse_wrapped_options()) 7635 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7636 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7637 else: 7638 if self._match_text_seq("SERDE"): 7639 alter_set.set("serde", self._parse_field()) 7640 7641 properties = self._parse_wrapped(self._parse_properties, optional=True) 7642 alter_set.set("expressions", [properties]) 7643 7644 return alter_set 7645 7646 def _parse_alter_session(self) -> exp.AlterSession: 7647 """Parse ALTER SESSION SET/UNSET statements.""" 7648 if self._match(TokenType.SET): 7649 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7650 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7651 7652 self._match_text_seq("UNSET") 7653 expressions = self._parse_csv( 7654 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7655 ) 7656 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7657 7658 def _parse_alter(self) -> exp.Alter | exp.Command: 7659 start = self._prev 7660 7661 alter_token = self._match_set(self.ALTERABLES) and self._prev 7662 if not alter_token: 7663 return self._parse_as_command(start) 7664 7665 exists = self._parse_exists() 7666 only = self._match_text_seq("ONLY") 7667 7668 if alter_token.token_type == TokenType.SESSION: 7669 this = None 7670 check = None 7671 cluster = None 7672 else: 7673 this = self._parse_table(schema=True) 7674 check = self._match_text_seq("WITH", "CHECK") 7675 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7676 7677 if self._next: 7678 self._advance() 7679 7680 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7681 if parser: 7682 actions = ensure_list(parser(self)) 7683 not_valid = self._match_text_seq("NOT", "VALID") 7684 options = self._parse_csv(self._parse_property) 7685 7686 if not self._curr and actions: 7687 return self.expression( 7688 exp.Alter, 7689 this=this, 7690 kind=alter_token.text.upper(), 7691 exists=exists, 7692 actions=actions, 7693 only=only, 7694 options=options, 7695 cluster=cluster, 7696 not_valid=not_valid, 7697 check=check, 7698 ) 7699 7700 return self._parse_as_command(start) 7701 7702 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7703 start = self._prev 7704 # https://duckdb.org/docs/sql/statements/analyze 7705 if not self._curr: 7706 return self.expression(exp.Analyze) 7707 7708 options = [] 7709 while self._match_texts(self.ANALYZE_STYLES): 7710 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7711 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7712 else: 7713 options.append(self._prev.text.upper()) 7714 7715 this: t.Optional[exp.Expression] = None 7716 inner_expression: t.Optional[exp.Expression] = None 7717 7718 kind = self._curr and self._curr.text.upper() 7719 7720 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7721 this = self._parse_table_parts() 7722 elif self._match_text_seq("TABLES"): 7723 if self._match_set((TokenType.FROM, TokenType.IN)): 7724 kind = f"{kind} {self._prev.text.upper()}" 7725 this = self._parse_table(schema=True, is_db_reference=True) 7726 elif self._match_text_seq("DATABASE"): 7727 this = self._parse_table(schema=True, is_db_reference=True) 7728 elif self._match_text_seq("CLUSTER"): 7729 this = self._parse_table() 7730 # Try matching inner expr keywords before fallback to parse table. 7731 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7732 kind = None 7733 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7734 else: 7735 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7736 kind = None 7737 this = self._parse_table_parts() 7738 7739 partition = self._try_parse(self._parse_partition) 7740 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7741 return self._parse_as_command(start) 7742 7743 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7744 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7745 "WITH", "ASYNC", "MODE" 7746 ): 7747 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7748 else: 7749 mode = None 7750 7751 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7752 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7753 7754 properties = self._parse_properties() 7755 return self.expression( 7756 exp.Analyze, 7757 kind=kind, 7758 this=this, 7759 mode=mode, 7760 partition=partition, 7761 properties=properties, 7762 expression=inner_expression, 7763 options=options, 7764 ) 7765 7766 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7767 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7768 this = None 7769 kind = self._prev.text.upper() 7770 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7771 expressions = [] 7772 7773 if not self._match_text_seq("STATISTICS"): 7774 self.raise_error("Expecting token STATISTICS") 7775 7776 if self._match_text_seq("NOSCAN"): 7777 this = "NOSCAN" 7778 elif self._match(TokenType.FOR): 7779 if self._match_text_seq("ALL", "COLUMNS"): 7780 this = "FOR ALL COLUMNS" 7781 if self._match_texts("COLUMNS"): 7782 this = "FOR COLUMNS" 7783 expressions = self._parse_csv(self._parse_column_reference) 7784 elif self._match_text_seq("SAMPLE"): 7785 sample = self._parse_number() 7786 expressions = [ 7787 self.expression( 7788 exp.AnalyzeSample, 7789 sample=sample, 7790 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7791 ) 7792 ] 7793 7794 return self.expression( 7795 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7796 ) 7797 7798 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7799 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7800 kind = None 7801 this = None 7802 expression: t.Optional[exp.Expression] = None 7803 if self._match_text_seq("REF", "UPDATE"): 7804 kind = "REF" 7805 this = "UPDATE" 7806 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7807 this = "UPDATE SET DANGLING TO NULL" 7808 elif self._match_text_seq("STRUCTURE"): 7809 kind = "STRUCTURE" 7810 if self._match_text_seq("CASCADE", "FAST"): 7811 this = "CASCADE FAST" 7812 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7813 ("ONLINE", "OFFLINE") 7814 ): 7815 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7816 expression = self._parse_into() 7817 7818 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7819 7820 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7821 this = self._prev.text.upper() 7822 if self._match_text_seq("COLUMNS"): 7823 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7824 return None 7825 7826 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7827 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7828 if self._match_text_seq("STATISTICS"): 7829 return self.expression(exp.AnalyzeDelete, kind=kind) 7830 return None 7831 7832 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7833 if self._match_text_seq("CHAINED", "ROWS"): 7834 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7835 return None 7836 7837 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7838 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7839 this = self._prev.text.upper() 7840 expression: t.Optional[exp.Expression] = None 7841 expressions = [] 7842 update_options = None 7843 7844 if self._match_text_seq("HISTOGRAM", "ON"): 7845 expressions = self._parse_csv(self._parse_column_reference) 7846 with_expressions = [] 7847 while self._match(TokenType.WITH): 7848 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7849 if self._match_texts(("SYNC", "ASYNC")): 7850 if self._match_text_seq("MODE", advance=False): 7851 with_expressions.append(f"{self._prev.text.upper()} MODE") 7852 self._advance() 7853 else: 7854 buckets = self._parse_number() 7855 if self._match_text_seq("BUCKETS"): 7856 with_expressions.append(f"{buckets} BUCKETS") 7857 if with_expressions: 7858 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7859 7860 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7861 TokenType.UPDATE, advance=False 7862 ): 7863 update_options = self._prev.text.upper() 7864 self._advance() 7865 elif self._match_text_seq("USING", "DATA"): 7866 expression = self.expression(exp.UsingData, this=self._parse_string()) 7867 7868 return self.expression( 7869 exp.AnalyzeHistogram, 7870 this=this, 7871 expressions=expressions, 7872 expression=expression, 7873 update_options=update_options, 7874 ) 7875 7876 def _parse_merge(self) -> exp.Merge: 7877 self._match(TokenType.INTO) 7878 target = self._parse_table() 7879 7880 if target and self._match(TokenType.ALIAS, advance=False): 7881 target.set("alias", self._parse_table_alias()) 7882 7883 self._match(TokenType.USING) 7884 using = self._parse_table() 7885 7886 self._match(TokenType.ON) 7887 on = self._parse_assignment() 7888 7889 return self.expression( 7890 exp.Merge, 7891 this=target, 7892 using=using, 7893 on=on, 7894 whens=self._parse_when_matched(), 7895 returning=self._parse_returning(), 7896 ) 7897 7898 def _parse_when_matched(self) -> exp.Whens: 7899 whens = [] 7900 7901 while self._match(TokenType.WHEN): 7902 matched = not self._match(TokenType.NOT) 7903 self._match_text_seq("MATCHED") 7904 source = ( 7905 False 7906 if self._match_text_seq("BY", "TARGET") 7907 else self._match_text_seq("BY", "SOURCE") 7908 ) 7909 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7910 7911 self._match(TokenType.THEN) 7912 7913 if self._match(TokenType.INSERT): 7914 this = self._parse_star() 7915 if this: 7916 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7917 else: 7918 then = self.expression( 7919 exp.Insert, 7920 this=exp.var("ROW") 7921 if self._match_text_seq("ROW") 7922 else self._parse_value(values=False), 7923 expression=self._match_text_seq("VALUES") and self._parse_value(), 7924 ) 7925 elif self._match(TokenType.UPDATE): 7926 expressions = self._parse_star() 7927 if expressions: 7928 then = self.expression(exp.Update, expressions=expressions) 7929 else: 7930 then = self.expression( 7931 exp.Update, 7932 expressions=self._match(TokenType.SET) 7933 and self._parse_csv(self._parse_equality), 7934 ) 7935 elif self._match(TokenType.DELETE): 7936 then = self.expression(exp.Var, this=self._prev.text) 7937 else: 7938 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7939 7940 whens.append( 7941 self.expression( 7942 exp.When, 7943 matched=matched, 7944 source=source, 7945 condition=condition, 7946 then=then, 7947 ) 7948 ) 7949 return self.expression(exp.Whens, expressions=whens) 7950 7951 def _parse_show(self) -> t.Optional[exp.Expression]: 7952 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7953 if parser: 7954 return parser(self) 7955 return self._parse_as_command(self._prev) 7956 7957 def _parse_set_item_assignment( 7958 self, kind: t.Optional[str] = None 7959 ) -> t.Optional[exp.Expression]: 7960 index = self._index 7961 7962 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7963 return self._parse_set_transaction(global_=kind == "GLOBAL") 7964 7965 left = self._parse_primary() or self._parse_column() 7966 assignment_delimiter = self._match_texts(("=", "TO")) 7967 7968 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7969 self._retreat(index) 7970 return None 7971 7972 right = self._parse_statement() or self._parse_id_var() 7973 if isinstance(right, (exp.Column, exp.Identifier)): 7974 right = exp.var(right.name) 7975 7976 this = self.expression(exp.EQ, this=left, expression=right) 7977 return self.expression(exp.SetItem, this=this, kind=kind) 7978 7979 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7980 self._match_text_seq("TRANSACTION") 7981 characteristics = self._parse_csv( 7982 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7983 ) 7984 return self.expression( 7985 exp.SetItem, 7986 expressions=characteristics, 7987 kind="TRANSACTION", 7988 **{"global": global_}, # type: ignore 7989 ) 7990 7991 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7992 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7993 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7994 7995 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7996 index = self._index 7997 set_ = self.expression( 7998 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7999 ) 8000 8001 if self._curr: 8002 self._retreat(index) 8003 return self._parse_as_command(self._prev) 8004 8005 return set_ 8006 8007 def _parse_var_from_options( 8008 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8009 ) -> t.Optional[exp.Var]: 8010 start = self._curr 8011 if not start: 8012 return None 8013 8014 option = start.text.upper() 8015 continuations = options.get(option) 8016 8017 index = self._index 8018 self._advance() 8019 for keywords in continuations or []: 8020 if isinstance(keywords, str): 8021 keywords = (keywords,) 8022 8023 if self._match_text_seq(*keywords): 8024 option = f"{option} {' '.join(keywords)}" 8025 break 8026 else: 8027 if continuations or continuations is None: 8028 if raise_unmatched: 8029 self.raise_error(f"Unknown option {option}") 8030 8031 self._retreat(index) 8032 return None 8033 8034 return exp.var(option) 8035 8036 def _parse_as_command(self, start: Token) -> exp.Command: 8037 while self._curr: 8038 self._advance() 8039 text = self._find_sql(start, self._prev) 8040 size = len(start.text) 8041 self._warn_unsupported() 8042 return exp.Command(this=text[:size], expression=text[size:]) 8043 8044 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8045 settings = [] 8046 8047 self._match_l_paren() 8048 kind = self._parse_id_var() 8049 8050 if self._match(TokenType.L_PAREN): 8051 while True: 8052 key = self._parse_id_var() 8053 value = self._parse_primary() 8054 if not key and value is None: 8055 break 8056 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8057 self._match(TokenType.R_PAREN) 8058 8059 self._match_r_paren() 8060 8061 return self.expression( 8062 exp.DictProperty, 8063 this=this, 8064 kind=kind.this if kind else None, 8065 settings=settings, 8066 ) 8067 8068 def _parse_dict_range(self, this: str) -> exp.DictRange: 8069 self._match_l_paren() 8070 has_min = self._match_text_seq("MIN") 8071 if has_min: 8072 min = self._parse_var() or self._parse_primary() 8073 self._match_text_seq("MAX") 8074 max = self._parse_var() or self._parse_primary() 8075 else: 8076 max = self._parse_var() or self._parse_primary() 8077 min = exp.Literal.number(0) 8078 self._match_r_paren() 8079 return self.expression(exp.DictRange, this=this, min=min, max=max) 8080 8081 def _parse_comprehension( 8082 self, this: t.Optional[exp.Expression] 8083 ) -> t.Optional[exp.Comprehension]: 8084 index = self._index 8085 expression = self._parse_column() 8086 if not self._match(TokenType.IN): 8087 self._retreat(index - 1) 8088 return None 8089 iterator = self._parse_column() 8090 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8091 return self.expression( 8092 exp.Comprehension, 8093 this=this, 8094 expression=expression, 8095 iterator=iterator, 8096 condition=condition, 8097 ) 8098 8099 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8100 if self._match(TokenType.HEREDOC_STRING): 8101 return self.expression(exp.Heredoc, this=self._prev.text) 8102 8103 if not self._match_text_seq("$"): 8104 return None 8105 8106 tags = ["$"] 8107 tag_text = None 8108 8109 if self._is_connected(): 8110 self._advance() 8111 tags.append(self._prev.text.upper()) 8112 else: 8113 self.raise_error("No closing $ found") 8114 8115 if tags[-1] != "$": 8116 if self._is_connected() and self._match_text_seq("$"): 8117 tag_text = tags[-1] 8118 tags.append("$") 8119 else: 8120 self.raise_error("No closing $ found") 8121 8122 heredoc_start = self._curr 8123 8124 while self._curr: 8125 if self._match_text_seq(*tags, advance=False): 8126 this = self._find_sql(heredoc_start, self._prev) 8127 self._advance(len(tags)) 8128 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8129 8130 self._advance() 8131 8132 self.raise_error(f"No closing {''.join(tags)} found") 8133 return None 8134 8135 def _find_parser( 8136 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8137 ) -> t.Optional[t.Callable]: 8138 if not self._curr: 8139 return None 8140 8141 index = self._index 8142 this = [] 8143 while True: 8144 # The current token might be multiple words 8145 curr = self._curr.text.upper() 8146 key = curr.split(" ") 8147 this.append(curr) 8148 8149 self._advance() 8150 result, trie = in_trie(trie, key) 8151 if result == TrieResult.FAILED: 8152 break 8153 8154 if result == TrieResult.EXISTS: 8155 subparser = parsers[" ".join(this)] 8156 return subparser 8157 8158 self._retreat(index) 8159 return None 8160 8161 def _match(self, token_type, advance=True, expression=None): 8162 if not self._curr: 8163 return None 8164 8165 if self._curr.token_type == token_type: 8166 if advance: 8167 self._advance() 8168 self._add_comments(expression) 8169 return True 8170 8171 return None 8172 8173 def _match_set(self, types, advance=True): 8174 if not self._curr: 8175 return None 8176 8177 if self._curr.token_type in types: 8178 if advance: 8179 self._advance() 8180 return True 8181 8182 return None 8183 8184 def _match_pair(self, token_type_a, token_type_b, advance=True): 8185 if not self._curr or not self._next: 8186 return None 8187 8188 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8189 if advance: 8190 self._advance(2) 8191 return True 8192 8193 return None 8194 8195 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8196 if not self._match(TokenType.L_PAREN, expression=expression): 8197 self.raise_error("Expecting (") 8198 8199 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8200 if not self._match(TokenType.R_PAREN, expression=expression): 8201 self.raise_error("Expecting )") 8202 8203 def _match_texts(self, texts, advance=True): 8204 if ( 8205 self._curr 8206 and self._curr.token_type != TokenType.STRING 8207 and self._curr.text.upper() in texts 8208 ): 8209 if advance: 8210 self._advance() 8211 return True 8212 return None 8213 8214 def _match_text_seq(self, *texts, advance=True): 8215 index = self._index 8216 for text in texts: 8217 if ( 8218 self._curr 8219 and self._curr.token_type != TokenType.STRING 8220 and self._curr.text.upper() == text 8221 ): 8222 self._advance() 8223 else: 8224 self._retreat(index) 8225 return None 8226 8227 if not advance: 8228 self._retreat(index) 8229 8230 return True 8231 8232 def _replace_lambda( 8233 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8234 ) -> t.Optional[exp.Expression]: 8235 if not node: 8236 return node 8237 8238 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8239 8240 for column in node.find_all(exp.Column): 8241 typ = lambda_types.get(column.parts[0].name) 8242 if typ is not None: 8243 dot_or_id = column.to_dot() if column.table else column.this 8244 8245 if typ: 8246 dot_or_id = self.expression( 8247 exp.Cast, 8248 this=dot_or_id, 8249 to=typ, 8250 ) 8251 8252 parent = column.parent 8253 8254 while isinstance(parent, exp.Dot): 8255 if not isinstance(parent.parent, exp.Dot): 8256 parent.replace(dot_or_id) 8257 break 8258 parent = parent.parent 8259 else: 8260 if column is node: 8261 node = dot_or_id 8262 else: 8263 column.replace(dot_or_id) 8264 return node 8265 8266 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8267 start = self._prev 8268 8269 # Not to be confused with TRUNCATE(number, decimals) function call 8270 if self._match(TokenType.L_PAREN): 8271 self._retreat(self._index - 2) 8272 return self._parse_function() 8273 8274 # Clickhouse supports TRUNCATE DATABASE as well 8275 is_database = self._match(TokenType.DATABASE) 8276 8277 self._match(TokenType.TABLE) 8278 8279 exists = self._parse_exists(not_=False) 8280 8281 expressions = self._parse_csv( 8282 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8283 ) 8284 8285 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8286 8287 if self._match_text_seq("RESTART", "IDENTITY"): 8288 identity = "RESTART" 8289 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8290 identity = "CONTINUE" 8291 else: 8292 identity = None 8293 8294 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8295 option = self._prev.text 8296 else: 8297 option = None 8298 8299 partition = self._parse_partition() 8300 8301 # Fallback case 8302 if self._curr: 8303 return self._parse_as_command(start) 8304 8305 return self.expression( 8306 exp.TruncateTable, 8307 expressions=expressions, 8308 is_database=is_database, 8309 exists=exists, 8310 cluster=cluster, 8311 identity=identity, 8312 option=option, 8313 partition=partition, 8314 ) 8315 8316 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8317 this = self._parse_ordered(self._parse_opclass) 8318 8319 if not self._match(TokenType.WITH): 8320 return this 8321 8322 op = self._parse_var(any_token=True) 8323 8324 return self.expression(exp.WithOperator, this=this, op=op) 8325 8326 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8327 self._match(TokenType.EQ) 8328 self._match(TokenType.L_PAREN) 8329 8330 opts: t.List[t.Optional[exp.Expression]] = [] 8331 option: exp.Expression | None 8332 while self._curr and not self._match(TokenType.R_PAREN): 8333 if self._match_text_seq("FORMAT_NAME", "="): 8334 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8335 option = self._parse_format_name() 8336 else: 8337 option = self._parse_property() 8338 8339 if option is None: 8340 self.raise_error("Unable to parse option") 8341 break 8342 8343 opts.append(option) 8344 8345 return opts 8346 8347 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8348 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8349 8350 options = [] 8351 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8352 option = self._parse_var(any_token=True) 8353 prev = self._prev.text.upper() 8354 8355 # Different dialects might separate options and values by white space, "=" and "AS" 8356 self._match(TokenType.EQ) 8357 self._match(TokenType.ALIAS) 8358 8359 param = self.expression(exp.CopyParameter, this=option) 8360 8361 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8362 TokenType.L_PAREN, advance=False 8363 ): 8364 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8365 param.set("expressions", self._parse_wrapped_options()) 8366 elif prev == "FILE_FORMAT": 8367 # T-SQL's external file format case 8368 param.set("expression", self._parse_field()) 8369 else: 8370 param.set("expression", self._parse_unquoted_field()) 8371 8372 options.append(param) 8373 self._match(sep) 8374 8375 return options 8376 8377 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8378 expr = self.expression(exp.Credentials) 8379 8380 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8381 expr.set("storage", self._parse_field()) 8382 if self._match_text_seq("CREDENTIALS"): 8383 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8384 creds = ( 8385 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8386 ) 8387 expr.set("credentials", creds) 8388 if self._match_text_seq("ENCRYPTION"): 8389 expr.set("encryption", self._parse_wrapped_options()) 8390 if self._match_text_seq("IAM_ROLE"): 8391 expr.set("iam_role", self._parse_field()) 8392 if self._match_text_seq("REGION"): 8393 expr.set("region", self._parse_field()) 8394 8395 return expr 8396 8397 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8398 return self._parse_field() 8399 8400 def _parse_copy(self) -> exp.Copy | exp.Command: 8401 start = self._prev 8402 8403 self._match(TokenType.INTO) 8404 8405 this = ( 8406 self._parse_select(nested=True, parse_subquery_alias=False) 8407 if self._match(TokenType.L_PAREN, advance=False) 8408 else self._parse_table(schema=True) 8409 ) 8410 8411 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8412 8413 files = self._parse_csv(self._parse_file_location) 8414 if self._match(TokenType.EQ, advance=False): 8415 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8416 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8417 # list via `_parse_wrapped(..)` below. 8418 self._advance(-1) 8419 files = [] 8420 8421 credentials = self._parse_credentials() 8422 8423 self._match_text_seq("WITH") 8424 8425 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8426 8427 # Fallback case 8428 if self._curr: 8429 return self._parse_as_command(start) 8430 8431 return self.expression( 8432 exp.Copy, 8433 this=this, 8434 kind=kind, 8435 credentials=credentials, 8436 files=files, 8437 params=params, 8438 ) 8439 8440 def _parse_normalize(self) -> exp.Normalize: 8441 return self.expression( 8442 exp.Normalize, 8443 this=self._parse_bitwise(), 8444 form=self._match(TokenType.COMMA) and self._parse_var(), 8445 ) 8446 8447 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8448 args = self._parse_csv(lambda: self._parse_lambda()) 8449 8450 this = seq_get(args, 0) 8451 decimals = seq_get(args, 1) 8452 8453 return expr_type( 8454 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8455 ) 8456 8457 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8458 star_token = self._prev 8459 8460 if self._match_text_seq("COLUMNS", "(", advance=False): 8461 this = self._parse_function() 8462 if isinstance(this, exp.Columns): 8463 this.set("unpack", True) 8464 return this 8465 8466 return self.expression( 8467 exp.Star, 8468 **{ # type: ignore 8469 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8470 "replace": self._parse_star_op("REPLACE"), 8471 "rename": self._parse_star_op("RENAME"), 8472 }, 8473 ).update_positions(star_token) 8474 8475 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8476 privilege_parts = [] 8477 8478 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8479 # (end of privilege list) or L_PAREN (start of column list) are met 8480 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8481 privilege_parts.append(self._curr.text.upper()) 8482 self._advance() 8483 8484 this = exp.var(" ".join(privilege_parts)) 8485 expressions = ( 8486 self._parse_wrapped_csv(self._parse_column) 8487 if self._match(TokenType.L_PAREN, advance=False) 8488 else None 8489 ) 8490 8491 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8492 8493 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8494 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8495 principal = self._parse_id_var() 8496 8497 if not principal: 8498 return None 8499 8500 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8501 8502 def _parse_grant_revoke_common( 8503 self, 8504 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8505 privileges = self._parse_csv(self._parse_grant_privilege) 8506 8507 self._match(TokenType.ON) 8508 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8509 8510 # Attempt to parse the securable e.g. MySQL allows names 8511 # such as "foo.*", "*.*" which are not easily parseable yet 8512 securable = self._try_parse(self._parse_table_parts) 8513 8514 return privileges, kind, securable 8515 8516 def _parse_grant(self) -> exp.Grant | exp.Command: 8517 start = self._prev 8518 8519 privileges, kind, securable = self._parse_grant_revoke_common() 8520 8521 if not securable or not self._match_text_seq("TO"): 8522 return self._parse_as_command(start) 8523 8524 principals = self._parse_csv(self._parse_grant_principal) 8525 8526 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8527 8528 if self._curr: 8529 return self._parse_as_command(start) 8530 8531 return self.expression( 8532 exp.Grant, 8533 privileges=privileges, 8534 kind=kind, 8535 securable=securable, 8536 principals=principals, 8537 grant_option=grant_option, 8538 ) 8539 8540 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8541 start = self._prev 8542 8543 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8544 8545 privileges, kind, securable = self._parse_grant_revoke_common() 8546 8547 if not securable or not self._match_text_seq("FROM"): 8548 return self._parse_as_command(start) 8549 8550 principals = self._parse_csv(self._parse_grant_principal) 8551 8552 cascade = None 8553 if self._match_texts(("CASCADE", "RESTRICT")): 8554 cascade = self._prev.text.upper() 8555 8556 if self._curr: 8557 return self._parse_as_command(start) 8558 8559 return self.expression( 8560 exp.Revoke, 8561 privileges=privileges, 8562 kind=kind, 8563 securable=securable, 8564 principals=principals, 8565 grant_option=grant_option, 8566 cascade=cascade, 8567 ) 8568 8569 def _parse_overlay(self) -> exp.Overlay: 8570 return self.expression( 8571 exp.Overlay, 8572 **{ # type: ignore 8573 "this": self._parse_bitwise(), 8574 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8575 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8576 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8577 }, 8578 ) 8579 8580 def _parse_format_name(self) -> exp.Property: 8581 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8582 # for FILE_FORMAT = <format_name> 8583 return self.expression( 8584 exp.Property, 8585 this=exp.var("FORMAT_NAME"), 8586 value=self._parse_string() or self._parse_table_parts(), 8587 ) 8588 8589 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8590 args: t.List[exp.Expression] = [] 8591 8592 if self._match(TokenType.DISTINCT): 8593 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8594 self._match(TokenType.COMMA) 8595 8596 args.extend(self._parse_csv(self._parse_assignment)) 8597 8598 return self.expression( 8599 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8600 ) 8601 8602 def _identifier_expression( 8603 self, token: t.Optional[Token] = None, **kwargs: t.Any 8604 ) -> exp.Identifier: 8605 token = token or self._prev 8606 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8607 expression.update_positions(token) 8608 return expression 8609 8610 def _build_pipe_cte( 8611 self, 8612 query: exp.Query, 8613 expressions: t.List[exp.Expression], 8614 alias_cte: t.Optional[exp.TableAlias] = None, 8615 ) -> exp.Select: 8616 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8617 if alias_cte: 8618 new_cte = alias_cte 8619 else: 8620 self._pipe_cte_counter += 1 8621 new_cte = f"__tmp{self._pipe_cte_counter}" 8622 8623 with_ = query.args.get("with") 8624 ctes = with_.pop() if with_ else None 8625 8626 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8627 if ctes: 8628 new_select.set("with", ctes) 8629 8630 return new_select.with_(new_cte, as_=query, copy=False) 8631 8632 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8633 select = self._parse_select(consume_pipe=False) 8634 if not select: 8635 return query 8636 8637 return self._build_pipe_cte( 8638 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8639 ) 8640 8641 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8642 limit = self._parse_limit() 8643 offset = self._parse_offset() 8644 if limit: 8645 curr_limit = query.args.get("limit", limit) 8646 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8647 query.limit(limit, copy=False) 8648 if offset: 8649 curr_offset = query.args.get("offset") 8650 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8651 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8652 8653 return query 8654 8655 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8656 this = self._parse_assignment() 8657 if self._match_text_seq("GROUP", "AND", advance=False): 8658 return this 8659 8660 this = self._parse_alias(this) 8661 8662 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8663 return self._parse_ordered(lambda: this) 8664 8665 return this 8666 8667 def _parse_pipe_syntax_aggregate_group_order_by( 8668 self, query: exp.Select, group_by_exists: bool = True 8669 ) -> exp.Select: 8670 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8671 aggregates_or_groups, orders = [], [] 8672 for element in expr: 8673 if isinstance(element, exp.Ordered): 8674 this = element.this 8675 if isinstance(this, exp.Alias): 8676 element.set("this", this.args["alias"]) 8677 orders.append(element) 8678 else: 8679 this = element 8680 aggregates_or_groups.append(this) 8681 8682 if group_by_exists: 8683 query.select(*aggregates_or_groups, copy=False).group_by( 8684 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8685 copy=False, 8686 ) 8687 else: 8688 query.select(*aggregates_or_groups, append=False, copy=False) 8689 8690 if orders: 8691 return query.order_by(*orders, append=False, copy=False) 8692 8693 return query 8694 8695 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8696 self._match_text_seq("AGGREGATE") 8697 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8698 8699 if self._match(TokenType.GROUP_BY) or ( 8700 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8701 ): 8702 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8703 8704 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8705 8706 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8707 first_setop = self.parse_set_operation(this=query) 8708 if not first_setop: 8709 return None 8710 8711 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8712 expr = self._parse_paren() 8713 return expr.assert_is(exp.Subquery).unnest() if expr else None 8714 8715 first_setop.this.pop() 8716 8717 setops = [ 8718 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8719 *self._parse_csv(_parse_and_unwrap_query), 8720 ] 8721 8722 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8723 with_ = query.args.get("with") 8724 ctes = with_.pop() if with_ else None 8725 8726 if isinstance(first_setop, exp.Union): 8727 query = query.union(*setops, copy=False, **first_setop.args) 8728 elif isinstance(first_setop, exp.Except): 8729 query = query.except_(*setops, copy=False, **first_setop.args) 8730 else: 8731 query = query.intersect(*setops, copy=False, **first_setop.args) 8732 8733 query.set("with", ctes) 8734 8735 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8736 8737 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8738 join = self._parse_join() 8739 if not join: 8740 return None 8741 8742 if isinstance(query, exp.Select): 8743 return query.join(join, copy=False) 8744 8745 return query 8746 8747 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8748 pivots = self._parse_pivots() 8749 if not pivots: 8750 return query 8751 8752 from_ = query.args.get("from") 8753 if from_: 8754 from_.this.set("pivots", pivots) 8755 8756 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8757 8758 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8759 self._match_text_seq("EXTEND") 8760 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8761 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8762 8763 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8764 sample = self._parse_table_sample() 8765 8766 with_ = query.args.get("with") 8767 if with_: 8768 with_.expressions[-1].this.set("sample", sample) 8769 else: 8770 query.set("sample", sample) 8771 8772 return query 8773 8774 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8775 if isinstance(query, exp.Subquery): 8776 query = exp.select("*").from_(query, copy=False) 8777 8778 if not query.args.get("from"): 8779 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8780 8781 while self._match(TokenType.PIPE_GT): 8782 start = self._curr 8783 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8784 if not parser: 8785 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8786 # keywords, making it tricky to disambiguate them without lookahead. The approach 8787 # here is to try and parse a set operation and if that fails, then try to parse a 8788 # join operator. If that fails as well, then the operator is not supported. 8789 parsed_query = self._parse_pipe_syntax_set_operator(query) 8790 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8791 if not parsed_query: 8792 self._retreat(start) 8793 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8794 break 8795 query = parsed_query 8796 else: 8797 query = parser(self, query) 8798 8799 return query 8800 8801 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8802 vars = self._parse_csv(self._parse_id_var) 8803 if not vars: 8804 return None 8805 8806 return self.expression( 8807 exp.DeclareItem, 8808 this=vars, 8809 kind=self._parse_types(), 8810 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8811 ) 8812 8813 def _parse_declare(self) -> exp.Declare | exp.Command: 8814 start = self._prev 8815 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8816 8817 if not expressions or self._curr: 8818 return self._parse_as_command(start) 8819 8820 return self.expression(exp.Declare, expressions=expressions) 8821 8822 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8823 exp_class = exp.Cast if strict else exp.TryCast 8824 8825 if exp_class == exp.TryCast: 8826 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8827 8828 return self.expression(exp_class, **kwargs) 8829 8830 def _parse_json_value(self) -> exp.JSONValue: 8831 this = self._parse_bitwise() 8832 self._match(TokenType.COMMA) 8833 path = self._parse_bitwise() 8834 8835 returning = self._match(TokenType.RETURNING) and self._parse_type() 8836 8837 return self.expression( 8838 exp.JSONValue, 8839 this=this, 8840 path=self.dialect.to_json_path(path), 8841 returning=returning, 8842 on_condition=self._parse_on_condition(), 8843 ) 8844 8845 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8846 def concat_exprs( 8847 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8848 ) -> exp.Expression: 8849 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8850 concat_exprs = [ 8851 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8852 ] 8853 node.set("expressions", concat_exprs) 8854 return node 8855 if len(exprs) == 1: 8856 return exprs[0] 8857 return self.expression(exp.Concat, expressions=args, safe=True) 8858 8859 args = self._parse_csv(self._parse_lambda) 8860 8861 if args: 8862 order = args[-1] if isinstance(args[-1], exp.Order) else None 8863 8864 if order: 8865 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8866 # remove 'expr' from exp.Order and add it back to args 8867 args[-1] = order.this 8868 order.set("this", concat_exprs(order.this, args)) 8869 8870 this = order or concat_exprs(args[0], args) 8871 else: 8872 this = None 8873 8874 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8875 8876 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset()
1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 )
Logs or raises any found errors, depending on the chosen error level setting.
1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4862 def parse_set_operation( 4863 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4864 ) -> t.Optional[exp.Expression]: 4865 start = self._index 4866 _, side_token, kind_token = self._parse_join_parts() 4867 4868 side = side_token.text if side_token else None 4869 kind = kind_token.text if kind_token else None 4870 4871 if not self._match_set(self.SET_OPERATIONS): 4872 self._retreat(start) 4873 return None 4874 4875 token_type = self._prev.token_type 4876 4877 if token_type == TokenType.UNION: 4878 operation: t.Type[exp.SetOperation] = exp.Union 4879 elif token_type == TokenType.EXCEPT: 4880 operation = exp.Except 4881 else: 4882 operation = exp.Intersect 4883 4884 comments = self._prev.comments 4885 4886 if self._match(TokenType.DISTINCT): 4887 distinct: t.Optional[bool] = True 4888 elif self._match(TokenType.ALL): 4889 distinct = False 4890 else: 4891 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4892 if distinct is None: 4893 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4894 4895 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4896 "STRICT", "CORRESPONDING" 4897 ) 4898 if self._match_text_seq("CORRESPONDING"): 4899 by_name = True 4900 if not side and not kind: 4901 kind = "INNER" 4902 4903 on_column_list = None 4904 if by_name and self._match_texts(("ON", "BY")): 4905 on_column_list = self._parse_wrapped_csv(self._parse_column) 4906 4907 expression = self._parse_select( 4908 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4909 ) 4910 4911 return self.expression( 4912 operation, 4913 comments=comments, 4914 this=this, 4915 distinct=distinct, 4916 by_name=by_name, 4917 expression=expression, 4918 side=side, 4919 kind=kind, 4920 on=on_column_list, 4921 )